diff --git a/quaddtype/README.md b/quaddtype/README.md index e69de29b..60693cb9 100644 --- a/quaddtype/README.md +++ b/quaddtype/README.md @@ -0,0 +1,22 @@ +# Numpy-QuadDType + +## Installation + +``` +pip install numpy==2.1.0 +pip install -i https://test.pypi.org/simple/ quaddtype +``` + +## Usage + +```python +import numpy as np +from numpy_quaddtype import QuadPrecDType, QuadPrecision + +# using sleef backend (default) +np.array([1,2,3], dtype=QuadPrecDType()) +np.array([1,2,3], dtype=QuadPrecDType("sleef")) + +# using longdouble backend +np.array([1,2,3], dtype=QuadPrecDType("longdouble")) +``` diff --git a/quaddtype/meson.build b/quaddtype/meson.build index f1f9eab1..d6e651b8 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -1,4 +1,4 @@ -project('quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true']) +project('numpy_quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true']) py_mod = import('python') py = py_mod.find_installation() @@ -19,30 +19,33 @@ incdir_numpy = run_command(py, includes = include_directories( [ incdir_numpy, - 'quaddtype/src', + 'numpy_quaddtype/src', ] ) srcs = [ - 'quaddtype/src/casts.h', - 'quaddtype/src/casts.cpp', - 'quaddtype/src/scalar.h', - 'quaddtype/src/scalar.c', - 'quaddtype/src/dtype.h', - 'quaddtype/src/dtype.c', - 'quaddtype/src/quaddtype_main.c', - 'quaddtype/src/scalar_ops.h', - 'quaddtype/src/scalar_ops.cpp', - 'quaddtype/src/ops.hpp', - 'quaddtype/src/umath.h', - 'quaddtype/src/umath.cpp' + 'numpy_quaddtype/src/quad_common.h', + 'numpy_quaddtype/src/casts.h', + 'numpy_quaddtype/src/casts.cpp', + 'numpy_quaddtype/src/scalar.h', + 'numpy_quaddtype/src/scalar.c', + 'numpy_quaddtype/src/dtype.h', + 'numpy_quaddtype/src/dtype.c', + 'numpy_quaddtype/src/quaddtype_main.c', + 'numpy_quaddtype/src/scalar_ops.h', + 'numpy_quaddtype/src/scalar_ops.cpp', + 'numpy_quaddtype/src/ops.hpp', + 'numpy_quaddtype/src/umath.h', + 'numpy_quaddtype/src/umath.cpp', + 'numpy_quaddtype/src/dragon4.h', + 'numpy_quaddtype/src/dragon4.c' ] py.install_sources( [ - 'quaddtype/__init__.py', + 'numpy_quaddtype/__init__.py', ], - subdir: 'quaddtype', + subdir: 'numpy_quaddtype', pure: false ) @@ -51,6 +54,6 @@ srcs, c_args: ['-g', '-O0', '-lsleef', '-lsleefquad'], dependencies: [sleef_dep, sleefquad_dep], install: true, -subdir: 'quaddtype', +subdir: 'numpy_quaddtype', include_directories: includes ) \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py new file mode 100644 index 00000000..e469a4c1 --- /dev/null +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -0,0 +1,34 @@ +from ._quaddtype_main import ( + QuadPrecision, + QuadPrecDType, + is_longdouble_128, + get_sleef_constant +) + +__all__ = [ + 'QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', + 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', 'pi', 'e', + 'log2e', 'log10e', 'ln2', 'ln10', 'max_value', 'min_value', 'epsilon' +] + +def SleefQuadPrecision(value): + return QuadPrecision(value, backend='sleef') + +def LongDoubleQuadPrecision(value): + return QuadPrecision(value, backend='longdouble') + +def SleefQuadPrecDType(): + return QuadPrecDType(backend='sleef') + +def LongDoubleQuadPrecDType(): + return QuadPrecDType(backend='longdouble') + +pi = get_sleef_constant("pi") +e = get_sleef_constant("e") +log2e = get_sleef_constant("log2e") +log10e = get_sleef_constant("log10e") +ln2 = get_sleef_constant("ln2") +ln10 = get_sleef_constant("ln10") +max_value = get_sleef_constant("quad_max") +min_value = get_sleef_constant("quad_min") +epsilon = get_sleef_constant("epsilon") \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp new file mode 100644 index 00000000..c662b4d5 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -0,0 +1,798 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/dtype_api.h" +} +#include "sleef.h" +#include "sleefquad.h" + +#include "quad_common.h" +#include "scalar.h" +#include "casts.h" +#include "dtype.h" + +#define NUM_CASTS 29 // 14 to_casts + 14 from_casts + 1 quad_to_quad + +static NPY_CASTING +quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + QuadPrecDTypeObject *given_descrs[2], + QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset) +{ + NPY_CASTING casting = NPY_NO_CASTING; + + if (given_descrs[0]->backend != given_descrs[1]->backend) + casting = NPY_UNSAFE_CASTING; + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + Py_INCREF(given_descrs[0]); + loop_descrs[1] = given_descrs[0]; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + *view_offset = 0; + return casting; +} + +static int +quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + + // inter-backend casting + if (descr_in->backend != descr_out->backend) { + while (N--) { + quad_value in_val, out_val; + if (descr_in->backend == BACKEND_SLEEF) { + memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad)); + out_val.longdouble_value = Sleef_cast_to_doubleq1(in_val.sleef_value); + } + else { + memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double)); + out_val.sleef_value = Sleef_cast_from_doubleq1(in_val.longdouble_value); + } + memcpy(out_ptr, &out_val, + (descr_out->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) + : sizeof(long double)); + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; + } + + size_t elem_size = + (descr_in->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) { + memcpy(out_ptr, in_ptr, elem_size); + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +static int +quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + + // inter-backend casting + if (descr_in->backend != descr_out->backend) { + if (descr_in->backend == BACKEND_SLEEF) { + while (N--) { + Sleef_quad in_val = *(Sleef_quad *)in_ptr; + *(long double *)out_ptr = Sleef_cast_to_doubleq1(in_val); + in_ptr += in_stride; + out_ptr += out_stride; + } + } + else { + while (N--) { + long double in_val = *(long double *)in_ptr; + *(Sleef_quad *)out_ptr = Sleef_cast_from_doubleq1(in_val); + in_ptr += in_stride; + out_ptr += out_stride; + } + } + + return 0; + } + + if (descr_in->backend == BACKEND_SLEEF) { + while (N--) { + *(Sleef_quad *)out_ptr = *(Sleef_quad *)in_ptr; + in_ptr += in_stride; + out_ptr += out_stride; + } + } + else { + while (N--) { + *(long double *)out_ptr = *(long double *)in_ptr; + in_ptr += in_stride; + out_ptr += out_stride; + } + } + + return 0; +} + +// Casting from other types to QuadDType + +template +static inline quad_value +to_quad(T x, QuadBackendType backend); + +template <> +inline quad_value +to_quad(npy_bool x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0); + } + else { + result.longdouble_value = x ? 1.0L : 0.0L; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_byte x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_short x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_ushort x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_int x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_uint x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_long x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_ulong x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_longlong x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_ulonglong x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} +template <> +inline quad_value +to_quad(float x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_doubleq1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(double x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_doubleq1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(long double x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_doubleq1(x); + } + else { + result.longdouble_value = x; + } + return result; +} + +template +static NPY_CASTING +numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + // todo: here it is converting this to SLEEF, losing data and getting 0 + if (given_descrs[1] == NULL) { + loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); + if (loop_descrs[1] == nullptr) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]); + return NPY_SAFE_CASTING; +} + +template +static int +numpy_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + QuadBackendType backend = descr_out->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) { + T in_val; + quad_value out_val; + + memcpy(&in_val, in_ptr, sizeof(T)); + out_val = to_quad(in_val, backend); + memcpy(out_ptr, &out_val, elem_size); + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} + +template +static int +numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + QuadBackendType backend = descr_out->backend; + + while (N--) { + T in_val = *(T *)in_ptr; + quad_value out_val = to_quad(in_val, backend); + + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)(out_ptr) = out_val.sleef_value; + } + else { + *(long double *)(out_ptr) = out_val.longdouble_value; + } + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} + +// Casting from QuadDType to other types + +template +static inline T +from_quad(quad_value x, QuadBackendType backend); + +template <> +inline npy_bool +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_int64q1(x.sleef_value) != 0; + } + else { + return x.longdouble_value != 0; + } +} + +template <> +inline npy_byte +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_byte)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_byte)x.longdouble_value; + } +} + +template <> +inline npy_short +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_short)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_short)x.longdouble_value; + } +} + +template <> +inline npy_ushort +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_ushort)Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_ushort)x.longdouble_value; + } +} + +template <> +inline npy_int +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_int)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_int)x.longdouble_value; + } +} + +template <> +inline npy_uint +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_uint)Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_uint)x.longdouble_value; + } +} + +template <> +inline npy_long +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_long)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_long)x.longdouble_value; + } +} + +template <> +inline npy_ulong +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_ulong)Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_ulong)x.longdouble_value; + } +} + +template <> +inline npy_longlong +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_longlong)x.longdouble_value; + } +} + +template <> +inline npy_ulonglong +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_ulonglong)x.longdouble_value; + } +} + +template <> +inline float +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (float)Sleef_cast_to_doubleq1(x.sleef_value); + } + else { + return (float)x.longdouble_value; + } +} + +template <> +inline double +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_doubleq1(x.sleef_value); + } + else { + return (double)x.longdouble_value; + } +} + +template <> +inline long double +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (long double)Sleef_cast_to_doubleq1(x.sleef_value); + } + else { + return x.longdouble_value; + } +} + +template +static NPY_CASTING +quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]); + return NPY_UNSAFE_CASTING; +} + +template +static int +quad_to_numpy_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = quad_descr->backend; + + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) { + quad_value in_val; + memcpy(&in_val, in_ptr, elem_size); + + T out_val = from_quad(in_val, backend); + memcpy(out_ptr, &out_val, sizeof(T)); + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} + +template +static int +quad_to_numpy_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = quad_descr->backend; + + while (N--) { + quad_value in_val; + if (backend == BACKEND_SLEEF) { + in_val.sleef_value = *(Sleef_quad *)in_ptr; + } + else { + in_val.longdouble_value = *(long double *)in_ptr; + } + + T out_val = from_quad(in_val, backend); + *(T *)(out_ptr) = out_val; + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} + +static PyArrayMethod_Spec *specs[NUM_CASTS + 1]; // +1 for NULL terminator +static size_t spec_count = 0; + +void +add_spec(PyArrayMethod_Spec *spec) +{ + if (spec_count < NUM_CASTS) { + specs[spec_count++] = spec; + } + else { + delete[] spec->dtypes; + delete[] spec->slots; + delete spec; + } +} + +// functions to add casts +template +void +add_cast_from(PyArray_DTypeMeta *to) +{ + PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, to}; + + PyType_Slot *slots = new PyType_Slot[]{ + {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_numpy_strided_loop_unaligned}, + {0, nullptr}}; + + PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ + .name = "cast_QuadPrec_to_NumPy", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + add_spec(spec); +} + +template +void +add_cast_to(PyArray_DTypeMeta *from) +{ + PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, &QuadPrecDType}; + + PyType_Slot *slots = new PyType_Slot[]{ + {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&numpy_to_quad_strided_loop_unaligned}, + {0, nullptr}}; + + PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ + .name = "cast_NumPy_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_SAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + add_spec(spec); +} + +PyArrayMethod_Spec ** +init_casts_internal(void) +{ + PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr}; + PyType_Slot *quad2quad_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned}, + {0, nullptr}}; + + PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{ + .name = "cast_QuadPrec_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, // since SLEEF -> ld might lose precision + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = quad2quad_dtypes, + .slots = quad2quad_slots, + }; + + add_spec(quad2quad_spec); + + add_cast_to(&PyArray_BoolDType); + add_cast_to(&PyArray_ByteDType); + add_cast_to(&PyArray_ShortDType); + add_cast_to(&PyArray_UShortDType); + add_cast_to(&PyArray_IntDType); + add_cast_to(&PyArray_UIntDType); + add_cast_to(&PyArray_LongDType); + add_cast_to(&PyArray_ULongDType); + add_cast_to(&PyArray_LongLongDType); + add_cast_to(&PyArray_ULongLongDType); + add_cast_to(&PyArray_FloatDType); + add_cast_to(&PyArray_DoubleDType); + add_cast_to(&PyArray_LongDoubleDType); + + add_cast_from(&PyArray_BoolDType); + add_cast_from(&PyArray_ByteDType); + add_cast_from(&PyArray_ShortDType); + add_cast_from(&PyArray_UShortDType); + add_cast_from(&PyArray_IntDType); + add_cast_from(&PyArray_UIntDType); + add_cast_from(&PyArray_LongDType); + add_cast_from(&PyArray_ULongDType); + add_cast_from(&PyArray_LongLongDType); + add_cast_from(&PyArray_ULongLongDType); + add_cast_from(&PyArray_FloatDType); + add_cast_from(&PyArray_DoubleDType); + add_cast_from(&PyArray_LongDoubleDType); + + specs[spec_count] = nullptr; + return specs; +} + +PyArrayMethod_Spec ** +init_casts(void) +{ + try { + return init_casts_internal(); + } + catch (int e) { + PyErr_NoMemory(); + return nullptr; + } +} + +void +free_casts(void) +{ + for (size_t i = 0; i < spec_count; i++) { + if (specs[i]) { + delete[] specs[i]->dtypes; + delete[] specs[i]->slots; + delete specs[i]; + specs[i] = nullptr; + } + } + spec_count = 0; +} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/casts.h b/quaddtype/numpy_quaddtype/src/casts.h similarity index 100% rename from quaddtype/quaddtype/src/casts.h rename to quaddtype/numpy_quaddtype/src/casts.h diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c new file mode 100644 index 00000000..34ad4cbb --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/dragon4.c @@ -0,0 +1,2024 @@ +/* +This code was extracted from NumPy and the original author was Allan Haldane(@ahaldane) +Modifications are specific to support the SLEEF_QUAD +*/ + +#include +#include +#include +#include +#include +#include +#include + +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +#include "dragon4.h" +#include "dtype.h" +#include "scalar.h" + +#if 0 +#define DEBUG_ASSERT(stmnt) assert(stmnt) +#else +#define DEBUG_ASSERT(stmnt) \ + do { \ + } while (0) +#endif + +#define c_BigInt_MaxBlocks 1023 +#define BIGINT_DRAGON4_GROUPSIZE 7 + +typedef struct BigInt { + npy_uint32 length; + npy_uint32 blocks[c_BigInt_MaxBlocks]; +} BigInt; + +typedef struct { + BigInt bigints[BIGINT_DRAGON4_GROUPSIZE]; + char repr[16384]; +} Dragon4_Scratch; + +static NPY_TLS Dragon4_Scratch _bigint_static; + +static inline npy_uint64 +bitmask_u64(npy_uint32 n) +{ + return ~(~((npy_uint64)0) << n); +} + +static inline npy_uint32 +bitmask_u32(npy_uint32 n) +{ + return ~(~((npy_uint32)0) << n); +} + +/* result = result * 10 */ +static void +BigInt_Multiply10(BigInt *result) +{ + /* multiply all the blocks */ + npy_uint64 carry = 0; + + npy_uint32 *cur = result->blocks; + npy_uint32 *end = result->blocks + result->length; + for (; cur != end; ++cur) { + npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry; + (*cur) = (npy_uint32)(product & bitmask_u64(32)); + carry = product >> 32; + } + + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks); + *cur = (npy_uint32)carry; + ++result->length; + } +} + +static npy_uint32 g_PowerOf10_U32[] = { + 1, /* 10 ^ 0 */ + 10, /* 10 ^ 1 */ + 100, /* 10 ^ 2 */ + 1000, /* 10 ^ 3 */ + 10000, /* 10 ^ 4 */ + 100000, /* 10 ^ 5 */ + 1000000, /* 10 ^ 6 */ + 10000000, /* 10 ^ 7 */ +}; + +/* + * Note: This has a lot of wasted space in the big integer structures of the + * early table entries. It wouldn't be terribly hard to make the multiply + * function work on integer pointers with an array length instead of + * the BigInt struct which would allow us to store a minimal amount of + * data here. + */ +static BigInt g_PowerOf10_Big[] = { + /* 10 ^ 8 */ + {1, {100000000}}, + /* 10 ^ 16 */ + {2, {0x6fc10000, 0x002386f2}}, + /* 10 ^ 32 */ + {4, + { + 0x00000000, + 0x85acef81, + 0x2d6d415b, + 0x000004ee, + }}, + /* 10 ^ 64 */ + {7, + { + 0x00000000, + 0x00000000, + 0xbf6a1f01, + 0x6e38ed64, + 0xdaa797ed, + 0xe93ff9f4, + 0x00184f03, + }}, + /* 10 ^ 128 */ + {14, + { + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x2e953e01, + 0x03df9909, + 0x0f1538fd, + 0x2374e42f, + 0xd3cff5ec, + 0xc404dc08, + 0xbccdb0da, + 0xa6337f19, + 0xe91f2603, + 0x0000024e, + }}, + /* 10 ^ 256 */ + {27, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, + 0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, 0xcc5573c0, + 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x000553f7, + }}, + /* 10 ^ 512 */ + {54, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f, 0xb83a8a97, + 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1, 0x4475b579, 0x28f8733b, 0xaa1da1bf, + 0x703ed321, 0x1e25cfea, 0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae, + 0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0, 0x93abade4, 0x1366732a, + 0x9449775c, 0x69be5b0e, 0x7343afac, 0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, + 0x8a0b1f13, 0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, + }}, + /* 10 ^ 1024 */ + {107, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b, + 0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a, 0xcc2cd819, 0x0ed1440e, + 0x896634ee, 0x7de16cfb, 0x1e43f61f, 0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, + 0xf451218b, 0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07, 0x62be695a, + 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5, 0xba34c411, 0xd814b505, 0xbf3fdeb3, + 0x8fc51a16, 0xb1b896bc, 0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb, + 0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66, 0xfc8f1f90, 0x976a3310, + 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8, 0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, + 0x31b5653f, 0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b, 0x1640114a, + 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06, 0x73c55349, 0xa7e6bd2a, 0xc1a6970c, + 0x47187094, 0xd2db49ef, 0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94, + 0xd9d61a05, 0x00000325, + }}, + /* 10 ^ 2048 */ + {213, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x1333e001, 0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354, + 0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773, 0xea50d142, 0x51b78db2, + 0x98342c9e, 0xc850dabc, 0x866ed6f1, 0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, + 0x71c7fd8f, 0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1, 0x9713b449, + 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643, 0xe3cb64f3, 0x8074ff01, 0xe3892ee9, + 0x10c17f94, 0xa8f16f92, 0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44, + 0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974, 0xd6c7d6ab, 0x8e48ff35, + 0x4419bd43, 0xc4a65665, 0x685e5510, 0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, + 0x982da466, 0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e, 0x1aa9f444, + 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62, 0x944545ff, 0x40660460, 0x77c1a42f, + 0xc9ac375d, 0xe866d7ef, 0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19, + 0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e, 0x785ce6b7, 0x56c6815b, + 0x6f46eadb, 0x4eeebeee, 0x195355d8, 0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, + 0xde9ec24b, 0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b, 0xfb8886fb, + 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943, 0x58ccf45c, 0xf82df846, 0x415c7f3e, + 0x2915e818, 0x8b3d5cf4, 0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34, + 0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14, 0xe0de0840, 0xf775677c, + 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9, 0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, + 0x907855ee, 0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d, 0x0af908b4, + 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6, 0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, + 0x11eb962b, 0x9648b21c, 0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac, + 0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, + }}, + /* 10 ^ 4096 */ + {426, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d, 0x8efe7ae7, 0xf89a1e90, 0xef084117, + 0x54e05154, 0x13b1bb51, 0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3, + 0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da, 0x1d1f4cce, 0x9b8ba1e8, + 0xd6bfdf75, 0xe341be10, 0xc2dfae78, 0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, + 0xcab3e6d7, 0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982, 0xf8169c84, + 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905, 0x54ab9c41, 0x7613b224, 0x1a1c304e, + 0x3bfe167b, 0x441c2d47, 0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e, + 0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507, 0x3f8ec72a, 0xfd82373a, + 0x95a842bc, 0x280f4d32, 0xf3618ac0, 0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, + 0xdcfe388f, 0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c, 0x655fec30, + 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed, 0x3b9af26b, 0xc994a450, 0xb5341743, + 0x75a697b2, 0xac50b9c1, 0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb, + 0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa, 0x176ca776, 0x77fb8af6, + 0xeca19beb, 0x92baf1de, 0xaf63b712, 0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, + 0x87d1cde8, 0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b, 0x1fa8fd5d, + 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925, 0xf984d83e, 0x7a872e74, 0xc19e1faf, + 0xed4d542d, 0xecf9b5d0, 0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a, + 0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8, 0xab312bb9, 0x1f2b2322, + 0x440b3f25, 0xbf627ede, 0x72dac789, 0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, + 0xbb9373f4, 0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb, 0x3172826a, + 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9, 0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, + 0x3d326a7e, 0xeeaf19e6, 0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4, + 0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf, 0x9ee3352c, 0x3613c850, + 0xfe78d682, 0x788f6e50, 0x5b060904, 0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, + 0xa6e9cfda, 0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8, 0x4dcd6cd9, + 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc, 0xc32da383, 0xfb126ab1, 0xa03f40a8, + 0xed5bf546, 0xe9ce4724, 0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb, + 0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999, 0x35e836b5, 0xa96ddeff, + 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d, 0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, + 0x00fc509b, 0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407, 0x53400d03, + 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc, 0x41a4c954, 0xe03b2b63, 0x437b3d7f, + 0x97aced8e, 0xcbd66670, 0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df, + 0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4, 0x56c02d9a, 0x8c4ef804, + 0xc1a08a13, 0x13fd01c8, 0xe6d27671, 0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, + 0x544f10cd, 0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb, 0x84e857fd, + 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982, 0xaec29e6e, 0x87ec7038, 0x7000a519, + 0xaeee333b, 0xff66e42c, 0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e, + 0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f, 0xac7867f9, 0x52ec8721, + 0xbcdd9d4a, 0x8edd2d00, 0x3557de06, 0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, + 0xb6100fab, 0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5, 0x35fcb457, + 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21, 0x4964046f, 0x18ae4240, 0xd868b275, + 0x8bd2b496, 0x1c5563f4, 0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee, + 0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145, 0x00000062, + }}, +}; + +static int +BigInt_IsZero(const BigInt *i) +{ + return i->length == 0; +} + +/* + * Returns 1 if the value is even + */ +static int +BigInt_IsEven(const BigInt *i) +{ + return (i->length == 0) || ((i->blocks[0] % 2) == 0); +} + +static void +BigInt_Copy(BigInt *dst, const BigInt *src) +{ + npy_uint32 length = src->length; + npy_uint32 *dstp = dst->blocks; + const npy_uint32 *srcp; + for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) { + *dstp = *srcp; + } + dst->length = length; +} + +/* result = result << shift */ +static void +BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) +{ + npy_uint32 shiftBlocks = shift / 32; + npy_uint32 shiftBits = shift % 32; + + /* process blocks high to low so that we can safely process in place */ + const npy_uint32 *pInBlocks = result->blocks; + npy_int32 inLength = result->length; + npy_uint32 *pInCur, *pOutCur; + + DEBUG_ASSERT(inLength + shiftBlocks < c_BigInt_MaxBlocks); + DEBUG_ASSERT(shift != 0); + + /* check if the shift is block aligned */ + if (shiftBits == 0) { + npy_uint32 i; + + /* copy blocks from high to low */ + for (pInCur = result->blocks + result->length, pOutCur = pInCur + shiftBlocks; + pInCur >= pInBlocks; --pInCur, --pOutCur) { + *pOutCur = *pInCur; + } + + /* zero the remaining low blocks */ + for (i = 0; i < shiftBlocks; ++i) { + result->blocks[i] = 0; + } + + result->length += shiftBlocks; + } + /* else we need to shift partial blocks */ + else { + npy_uint32 i; + npy_int32 inBlockIdx = inLength - 1; + npy_uint32 outBlockIdx = inLength + shiftBlocks; + + /* output the initial blocks */ + const npy_uint32 lowBitsShift = (32 - shiftBits); + npy_uint32 highBits = 0; + npy_uint32 block = result->blocks[inBlockIdx]; + npy_uint32 lowBits = block >> lowBitsShift; + + /* set the length to hold the shifted blocks */ + DEBUG_ASSERT(outBlockIdx < c_BigInt_MaxBlocks); + result->length = outBlockIdx + 1; + + while (inBlockIdx > 0) { + result->blocks[outBlockIdx] = highBits | lowBits; + highBits = block << shiftBits; + + --inBlockIdx; + --outBlockIdx; + + block = result->blocks[inBlockIdx]; + lowBits = block >> lowBitsShift; + } + + /* output the final blocks */ + DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1); + result->blocks[outBlockIdx] = highBits | lowBits; + result->blocks[outBlockIdx - 1] = block << shiftBits; + + /* zero the remaining low blocks */ + for (i = 0; i < shiftBlocks; ++i) { + result->blocks[i] = 0; + } + + /* check if the terminating block has no set bits */ + if (result->blocks[result->length - 1] == 0) { + --result->length; + } + } +} + +static void +BigInt_Set_uint32(BigInt *i, npy_uint32 val) +{ + if (val != 0) { + i->blocks[0] = val; + i->length = 1; + } + else { + i->length = 0; + } +} + +/* result = 2^exponent */ +static inline void +BigInt_Pow2(BigInt *result, npy_uint32 exponent) +{ + npy_uint32 bitIdx; + npy_uint32 blockIdx = exponent / 32; + npy_uint32 i; + + DEBUG_ASSERT(blockIdx < c_BigInt_MaxBlocks); + + for (i = 0; i <= blockIdx; ++i) { + result->blocks[i] = 0; + } + + result->length = blockIdx + 1; + + bitIdx = (exponent % 32); + result->blocks[blockIdx] |= ((npy_uint32)1 << bitIdx); +} + +static void +BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo) +{ + if (hi > bitmask_u64(32)) { + i->length = 4; + } + else if (hi != 0) { + i->length = 3; + } + else if (lo > bitmask_u64(32)) { + i->length = 2; + } + else if (lo != 0) { + i->length = 1; + } + else { + i->length = 0; + } + + /* Note deliberate fallthrough in this switch */ + switch (i->length) { + case 4: + i->blocks[3] = (hi >> 32) & bitmask_u64(32); + case 3: + i->blocks[2] = hi & bitmask_u64(32); + case 2: + i->blocks[1] = (lo >> 32) & bitmask_u64(32); + case 1: + i->blocks[0] = lo & bitmask_u64(32); + } +} + +/* result = lhs * rhs */ +static void +BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs) +{ + /* perform long multiplication */ + npy_uint32 carry = 0; + npy_uint32 *resultCur = result->blocks; + const npy_uint32 *pLhsCur = lhs->blocks; + const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length; + for (; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { + npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry; + *resultCur = (npy_uint32)(product & bitmask_u64(32)); + carry = product >> 32; + } + + /* if there is a remaining carry, grow the array */ + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(lhs->length + 1 <= c_BigInt_MaxBlocks); + *resultCur = (npy_uint32)carry; + result->length = lhs->length + 1; + } + else { + result->length = lhs->length; + } +} + +/* + * result = lhs * rhs + */ +static void +BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) +{ + const BigInt *large; + const BigInt *small; + npy_uint32 maxResultLen; + npy_uint32 *cur, *end, *resultStart; + const npy_uint32 *smallCur; + + DEBUG_ASSERT(result != lhs && result != rhs); + + /* determine which operand has the smaller length */ + if (lhs->length < rhs->length) { + small = lhs; + large = rhs; + } + else { + small = rhs; + large = lhs; + } + + /* set the maximum possible result length */ + maxResultLen = large->length + small->length; + DEBUG_ASSERT(maxResultLen <= c_BigInt_MaxBlocks); + + /* clear the result data */ + for (cur = result->blocks, end = cur + maxResultLen; cur != end; ++cur) { + *cur = 0; + } + + /* perform standard long multiplication for each small block */ + resultStart = result->blocks; + for (smallCur = small->blocks; smallCur != small->blocks + small->length; + ++smallCur, ++resultStart) { + /* + * if non-zero, multiply against all the large blocks and add into the + * result + */ + const npy_uint32 multiplier = *smallCur; + if (multiplier != 0) { + const npy_uint32 *largeCur = large->blocks; + npy_uint32 *resultCur = resultStart; + npy_uint64 carry = 0; + do { + npy_uint64 product = (*resultCur) + (*largeCur) * (npy_uint64)multiplier + carry; + carry = product >> 32; + *resultCur = product & bitmask_u64(32); + ++largeCur; + ++resultCur; + } while (largeCur != large->blocks + large->length); + + DEBUG_ASSERT(resultCur < result->blocks + maxResultLen); + *resultCur = (npy_uint32)(carry & bitmask_u64(32)); + } + } + + /* check if the terminating block has no set bits */ + if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) { + result->length = maxResultLen - 1; + } + else { + result->length = maxResultLen; + } +} + +/* in = in * 10^exponent */ +static void +BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) +{ + /* use two temporary values to reduce large integer copy operations */ + BigInt *curTemp, *pNextTemp; + npy_uint32 smallExponent; + npy_uint32 tableIdx = 0; + + /* make sure the exponent is within the bounds of the lookup table data */ + DEBUG_ASSERT(exponent < 8192); + + /* + * initialize the result by looking up a 32-bit power of 10 corresponding to + * the first 3 bits + */ + smallExponent = exponent & bitmask_u32(3); + if (smallExponent != 0) { + BigInt_Multiply_int(temp, in, g_PowerOf10_U32[smallExponent]); + curTemp = temp; + pNextTemp = in; + } + else { + curTemp = in; + pNextTemp = temp; + } + + /* remove the low bits that we used for the 32-bit lookup table */ + exponent >>= 3; + + /* while there are remaining bits in the exponent to be processed */ + while (exponent != 0) { + /* if the current bit is set, multiply by this power of 10 */ + if (exponent & 1) { + BigInt *pSwap; + + /* multiply into the next temporary */ + BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); + + /* swap to the next temporary */ + pSwap = curTemp; + curTemp = pNextTemp; + pNextTemp = pSwap; + } + + /* advance to the next bit */ + ++tableIdx; + exponent >>= 1; + } + + /* output the result */ + if (curTemp != in) { + BigInt_Copy(in, curTemp); + } +} + +/* result = 10^exponent */ +static void +BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp) +{ + /* use two temporary values to reduce large integer copy operations */ + BigInt *curTemp = result; + BigInt *pNextTemp = temp; + npy_uint32 smallExponent; + npy_uint32 tableIdx = 0; + + /* make sure the exponent is within the bounds of the lookup table data */ + DEBUG_ASSERT(exponent < 8192); + + /* + * initialize the result by looking up a 32-bit power of 10 corresponding to + * the first 3 bits + */ + smallExponent = exponent & bitmask_u32(3); + BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]); + + /* remove the low bits that we used for the 32-bit lookup table */ + exponent >>= 3; + + /* while there are remaining bits in the exponent to be processed */ + while (exponent != 0) { + /* if the current bit is set, multiply by this power of 10 */ + if (exponent & 1) { + BigInt *pSwap; + + /* multiply into the next temporary */ + BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); + + /* swap to the next temporary */ + pSwap = curTemp; + curTemp = pNextTemp; + pNextTemp = pSwap; + } + + /* advance to the next bit */ + ++tableIdx; + exponent >>= 1; + } + + /* output the result */ + if (curTemp != result) { + BigInt_Copy(result, curTemp); + } +} + +/* result = lhs + rhs */ +static void +BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) +{ + /* determine which operand has the smaller length */ + const BigInt *large, *small; + npy_uint64 carry = 0; + const npy_uint32 *largeCur, *smallCur, *largeEnd, *smallEnd; + npy_uint32 *resultCur; + + if (lhs->length < rhs->length) { + small = lhs; + large = rhs; + } + else { + small = rhs; + large = lhs; + } + + /* The output will be at least as long as the largest input */ + result->length = large->length; + + /* Add each block and add carry the overflow to the next block */ + largeCur = large->blocks; + largeEnd = largeCur + large->length; + smallCur = small->blocks; + smallEnd = smallCur + small->length; + resultCur = result->blocks; + while (smallCur != smallEnd) { + npy_uint64 sum = carry + (npy_uint64)(*largeCur) + (npy_uint64)(*smallCur); + carry = sum >> 32; + *resultCur = sum & bitmask_u64(32); + ++largeCur; + ++smallCur; + ++resultCur; + } + + /* Add the carry to any blocks that only exist in the large operand */ + while (largeCur != largeEnd) { + npy_uint64 sum = carry + (npy_uint64)(*largeCur); + carry = sum >> 32; + (*resultCur) = sum & bitmask_u64(32); + ++largeCur; + ++resultCur; + } + + /* If there's still a carry, append a new block */ + if (carry != 0) { + DEBUG_ASSERT(carry == 1); + DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) == large->length && + (large->length < c_BigInt_MaxBlocks)); + *resultCur = 1; + result->length = large->length + 1; + } + else { + result->length = large->length; + } +} + +/* result = in * 2 */ +static void +BigInt_Multiply2(BigInt *result, const BigInt *in) +{ + /* shift all the blocks by one */ + npy_uint32 carry = 0; + + npy_uint32 *resultCur = result->blocks; + const npy_uint32 *pLhsCur = in->blocks; + const npy_uint32 *pLhsEnd = in->blocks + in->length; + for (; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { + npy_uint32 cur = *pLhsCur; + *resultCur = (cur << 1) | carry; + carry = cur >> 31; + } + + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(in->length + 1 <= c_BigInt_MaxBlocks); + *resultCur = carry; + result->length = in->length + 1; + } + else { + result->length = in->length; + } +} + +/* result = result * 2 */ +static void +BigInt_Multiply2_inplace(BigInt *result) +{ + /* shift all the blocks by one */ + npy_uint32 carry = 0; + + npy_uint32 *cur = result->blocks; + npy_uint32 *end = result->blocks + result->length; + for (; cur != end; ++cur) { + npy_uint32 tmpcur = *cur; + *cur = (tmpcur << 1) | carry; + carry = tmpcur >> 31; + } + + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks); + *cur = carry; + ++result->length; + } +} + +static npy_int32 +BigInt_Compare(const BigInt *lhs, const BigInt *rhs) +{ + int i; + + /* A bigger length implies a bigger number. */ + npy_int32 lengthDiff = lhs->length - rhs->length; + if (lengthDiff != 0) { + return lengthDiff; + } + + /* Compare blocks one by one from high to low. */ + for (i = lhs->length - 1; i >= 0; --i) { + if (lhs->blocks[i] == rhs->blocks[i]) { + continue; + } + else if (lhs->blocks[i] > rhs->blocks[i]) { + return 1; + } + else { + return -1; + } + } + + /* no blocks differed */ + return 0; +} + +static npy_uint32 +BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) +{ + npy_uint32 length, quotient; + const npy_uint32 *finalDivisorBlock; + npy_uint32 *finalDividendBlock; + + /* + * Check that the divisor has been correctly shifted into range and that it + * is not smaller than the dividend in length. + */ + DEBUG_ASSERT(!divisor->length == 0 && divisor->blocks[divisor->length - 1] >= 8 && + divisor->blocks[divisor->length - 1] < bitmask_u64(32) && + dividend->length <= divisor->length); + + /* + * If the dividend is smaller than the divisor, the quotient is zero and the + * divisor is already the remainder. + */ + length = divisor->length; + if (dividend->length < divisor->length) { + return 0; + } + + finalDivisorBlock = divisor->blocks + length - 1; + finalDividendBlock = dividend->blocks + length - 1; + + /* + * Compute an estimated quotient based on the high block value. This will + * either match the actual quotient or undershoot by one. + */ + quotient = *finalDividendBlock / (*finalDivisorBlock + 1); + DEBUG_ASSERT(quotient <= 9); + + /* Divide out the estimated quotient */ + if (quotient != 0) { + /* dividend = dividend - divisor*quotient */ + const npy_uint32 *divisorCur = divisor->blocks; + npy_uint32 *dividendCur = dividend->blocks; + + npy_uint64 borrow = 0; + npy_uint64 carry = 0; + do { + npy_uint64 difference, product; + + product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry; + carry = product >> 32; + + difference = (npy_uint64)*dividendCur - (product & bitmask_u64(32)) - borrow; + borrow = (difference >> 32) & 1; + + *dividendCur = difference & bitmask_u64(32); + + ++divisorCur; + ++dividendCur; + } while (divisorCur <= finalDivisorBlock); + + /* remove all leading zero blocks from dividend */ + while (length > 0 && dividend->blocks[length - 1] == 0) { + --length; + } + + dividend->length = length; + } + + /* + * If the dividend is still larger than the divisor, we overshot our + * estimate quotient. To correct, we increment the quotient and subtract one + * more divisor from the dividend. + */ + if (BigInt_Compare(dividend, divisor) >= 0) { + /* dividend = dividend - divisor */ + const npy_uint32 *divisorCur = divisor->blocks; + npy_uint32 *dividendCur = dividend->blocks; + npy_uint64 borrow = 0; + + ++quotient; + + do { + npy_uint64 difference = (npy_uint64)*dividendCur - (npy_uint64)*divisorCur - borrow; + borrow = (difference >> 32) & 1; + + *dividendCur = difference & bitmask_u64(32); + + ++divisorCur; + ++dividendCur; + } while (divisorCur <= finalDivisorBlock); + + /* remove all leading zero blocks from dividend */ + while (length > 0 && dividend->blocks[length - 1] == 0) { + --length; + } + + dividend->length = length; + } + + return quotient; +} + +static npy_uint32 +LogBase2_32(npy_uint32 val) +{ + static const npy_uint8 logTable[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + + npy_uint32 temp; + + temp = val >> 24; + if (temp) { + return 24 + logTable[temp]; + } + + temp = val >> 16; + if (temp) { + return 16 + logTable[temp]; + } + + temp = val >> 8; + if (temp) { + return 8 + logTable[temp]; + } + + return logTable[val]; +} + +static npy_uint32 +LogBase2_64(npy_uint64 val) +{ + npy_uint64 temp; + + temp = val >> 32; + if (temp) { + return 32 + LogBase2_32((npy_uint32)temp); + } + + return LogBase2_32((npy_uint32)val); +} + +static npy_uint32 +LogBase2_128(npy_uint64 hi, npy_uint64 lo) +{ + if (hi) { + return 64 + LogBase2_64(hi); + } + + return LogBase2_64(lo); +} + +static npy_uint32 +PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, npy_uint32 mantissaHexWidth, + char signbit) +{ + npy_uint32 maxPrintLen = bufferSize - 1; + npy_uint32 pos = 0; + + DEBUG_ASSERT(bufferSize > 0); + + /* Check for infinity */ + if (mantissa == 0) { + npy_uint32 printLen; + + /* only print sign for inf values (though nan can have a sign set) */ + if (signbit == '+') { + if (pos < maxPrintLen - 1) { + buffer[pos++] = '+'; + } + } + else if (signbit == '-') { + if (pos < maxPrintLen - 1) { + buffer[pos++] = '-'; + } + } + + /* copy and make sure the buffer is terminated */ + printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos; + memcpy(buffer + pos, "inf", printLen); + buffer[pos + printLen] = '\0'; + return pos + printLen; + } + else { + /* copy and make sure the buffer is terminated */ + npy_uint32 printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos; + memcpy(buffer + pos, "nan", printLen); + buffer[pos + printLen] = '\0'; + + /* + * For numpy we ignore unusual mantissa values for nan, but keep this + * code in case we change our mind later. + * + * // append HEX value + * if (maxPrintLen > 3) { + * printLen += PrintHex(buffer+3, bufferSize-3, mantissa, + * mantissaHexWidth); + * } + */ + + return pos + printLen; + } +} + +static npy_uint32 +Dragon4(BigInt *bigints, const npy_int32 exponent, const npy_uint32 mantissaBit, + const npy_bool hasUnequalMargins, const DigitMode digitMode, const CutoffMode cutoffMode, + npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer, npy_uint32 bufferSize, + npy_int32 *pOutExponent) +{ + char *curDigit = pOutBuffer; + + /* + * We compute values in integer format by rescaling as + * mantissa = scaledValue / scale + * marginLow = scaledMarginLow / scale + * marginHigh = scaledMarginHigh / scale + * Here, marginLow and marginHigh represent 1/2 of the distance to the next + * floating point value above/below the mantissa. + * + * scaledMarginHigh will point to scaledMarginLow in the case they must be + * equal to each other, otherwise it will point to optionalMarginHigh. + */ + BigInt *mantissa = &bigints[0]; /* the only initialized bigint */ + BigInt *scale = &bigints[1]; + BigInt *scaledValue = &bigints[2]; + BigInt *scaledMarginLow = &bigints[3]; + BigInt *scaledMarginHigh; + BigInt *optionalMarginHigh = &bigints[4]; + + BigInt *temp1 = &bigints[5]; + BigInt *temp2 = &bigints[6]; + + const npy_float64 log10_2 = 0.30102999566398119521373889472449; + npy_int32 digitExponent, hiBlock; + npy_int32 cutoff_max_Exponent, cutoff_min_Exponent; + npy_uint32 outputDigit; /* current digit being output */ + npy_uint32 outputLen; + npy_bool isEven = BigInt_IsEven(mantissa); + npy_int32 cmp; + + /* values used to determine how to round */ + npy_bool low, high, roundDown; + + DEBUG_ASSERT(bufferSize > 0); + + /* if the mantissa is zero, the value is zero regardless of the exponent */ + if (BigInt_IsZero(mantissa)) { + *curDigit = '0'; + *pOutExponent = 0; + return 1; + } + + BigInt_Copy(scaledValue, mantissa); + + if (hasUnequalMargins) { + /* if we have no fractional component */ + if (exponent > 0) { + /* + * 1) Expand the input value by multiplying out the mantissa and + * exponent. This represents the input value in its whole number + * representation. + * 2) Apply an additional scale of 2 such that later comparisons + * against the margin values are simplified. + * 3) Set the margin value to the lowest mantissa bit's scale. + */ + + /* scaledValue = 2 * 2 * mantissa*2^exponent */ + BigInt_ShiftLeft(scaledValue, exponent + 2); + /* scale = 2 * 2 * 1 */ + BigInt_Set_uint32(scale, 4); + /* scaledMarginLow = 2 * 2^(exponent-1) */ + BigInt_Pow2(scaledMarginLow, exponent); + /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */ + BigInt_Pow2(optionalMarginHigh, exponent + 1); + } + /* else we have a fractional exponent */ + else { + /* + * In order to track the mantissa data as an integer, we store it as + * is with a large scale + */ + + /* scaledValue = 2 * 2 * mantissa */ + BigInt_ShiftLeft(scaledValue, 2); + /* scale = 2 * 2 * 2^(-exponent) */ + BigInt_Pow2(scale, -exponent + 2); + /* scaledMarginLow = 2 * 2^(-1) */ + BigInt_Set_uint32(scaledMarginLow, 1); + /* scaledMarginHigh = 2 * 2 * 2^(-1) */ + BigInt_Set_uint32(optionalMarginHigh, 2); + } + + /* the high and low margins are different */ + scaledMarginHigh = optionalMarginHigh; + } + else { + /* if we have no fractional component */ + if (exponent > 0) { + /* scaledValue = 2 * mantissa*2^exponent */ + BigInt_ShiftLeft(scaledValue, exponent + 1); + /* scale = 2 * 1 */ + BigInt_Set_uint32(scale, 2); + /* scaledMarginLow = 2 * 2^(exponent-1) */ + BigInt_Pow2(scaledMarginLow, exponent); + } + /* else we have a fractional exponent */ + else { + /* + * In order to track the mantissa data as an integer, we store it as + * is with a large scale + */ + + /* scaledValue = 2 * mantissa */ + BigInt_ShiftLeft(scaledValue, 1); + /* scale = 2 * 2^(-exponent) */ + BigInt_Pow2(scale, -exponent + 1); + /* scaledMarginLow = 2 * 2^(-1) */ + BigInt_Set_uint32(scaledMarginLow, 1); + } + + /* the high and low margins are equal */ + scaledMarginHigh = scaledMarginLow; + } + + /* + * Compute an estimate for digitExponent that will be correct or undershoot + * by one. This optimization is based on the paper "Printing Floating-Point + * Numbers Quickly and Accurately" by Burger and Dybvig + * https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656 + * We perform an additional subtraction of 0.69 to increase the frequency of + * a failed estimate because that lets us take a faster branch in the code. + * 0.69 is chosen because 0.69 + log10(2) is less than one by a reasonable + * epsilon that will account for any floating point error. + * + * We want to set digitExponent to floor(log10(v)) + 1 + * v = mantissa*2^exponent + * log2(v) = log2(mantissa) + exponent; + * log10(v) = log2(v) * log10(2) + * floor(log2(v)) = mantissaBit + exponent; + * log10(v) - log10(2) < (mantissaBit + exponent) * log10(2) <= log10(v) + * log10(v) < (mantissaBit + exponent) * log10(2) + log10(2) + * <= log10(v) + log10(2) + * floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2)) + * <= floor(log10(v)) + 1 + * + * Warning: This calculation assumes npy_float64 is an IEEE-binary64 + * float. This line may need to be updated if this is not the case. + */ + digitExponent = + (npy_int32)(ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69)); + + /* + * if the digit exponent is smaller than the smallest desired digit for + * fractional cutoff, pull the digit back into legal range at which point we + * will round to the appropriate value. Note that while our value for + * digitExponent is still an estimate, this is safe because it only + * increases the number. This will either correct digitExponent to an + * accurate value or it will clamp it above the accurate value. + */ + if (cutoff_max >= 0 && cutoffMode == CutoffMode_FractionLength && + digitExponent <= -cutoff_max) { + digitExponent = -cutoff_max + 1; + } + + /* Divide value by 10^digitExponent. */ + if (digitExponent > 0) { + /* A positive exponent creates a division so we multiply the scale. */ + BigInt_MultiplyPow10(scale, digitExponent, temp1); + } + else if (digitExponent < 0) { + /* + * A negative exponent creates a multiplication so we multiply up the + * scaledValue, scaledMarginLow and scaledMarginHigh. + */ + BigInt *temp = temp1, *pow10 = temp2; + BigInt_Pow10(pow10, -digitExponent, temp); + + BigInt_Multiply(temp, scaledValue, pow10); + BigInt_Copy(scaledValue, temp); + + BigInt_Multiply(temp, scaledMarginLow, pow10); + BigInt_Copy(scaledMarginLow, temp); + + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + + /* If (value >= 1), our estimate for digitExponent was too low */ + if (BigInt_Compare(scaledValue, scale) >= 0) { + /* + * The exponent estimate was incorrect. + * Increment the exponent and don't perform the premultiply needed + * for the first loop iteration. + */ + digitExponent = digitExponent + 1; + } + else { + /* + * The exponent estimate was correct. + * Multiply larger by the output base to prepare for the first loop + * iteration. + */ + BigInt_Multiply10(scaledValue); + BigInt_Multiply10(scaledMarginLow); + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + + /* + * Compute the cutoff_max exponent (the exponent of the final digit to + * print). Default to the maximum size of the output buffer. + */ + cutoff_max_Exponent = digitExponent - bufferSize; + if (cutoff_max >= 0) { + npy_int32 desiredCutoffExponent; + + if (cutoffMode == CutoffMode_TotalLength) { + desiredCutoffExponent = digitExponent - cutoff_max; + if (desiredCutoffExponent > cutoff_max_Exponent) { + cutoff_max_Exponent = desiredCutoffExponent; + } + } + /* Otherwise it's CutoffMode_FractionLength. Print cutoff_max digits + * past the decimal point or until we reach the buffer size + */ + else { + desiredCutoffExponent = -cutoff_max; + if (desiredCutoffExponent > cutoff_max_Exponent) { + cutoff_max_Exponent = desiredCutoffExponent; + } + } + } + /* Also compute the cutoff_min exponent. */ + cutoff_min_Exponent = digitExponent; + if (cutoff_min >= 0) { + npy_int32 desiredCutoffExponent; + + if (cutoffMode == CutoffMode_TotalLength) { + desiredCutoffExponent = digitExponent - cutoff_min; + if (desiredCutoffExponent < cutoff_min_Exponent) { + cutoff_min_Exponent = desiredCutoffExponent; + } + } + else { + desiredCutoffExponent = -cutoff_min; + if (desiredCutoffExponent < cutoff_min_Exponent) { + cutoff_min_Exponent = desiredCutoffExponent; + } + } + } + + /* Output the exponent of the first digit we will print */ + *pOutExponent = digitExponent - 1; + + /* + * In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we + * need to scale up our values such that the highest block of the + * denominator is greater than or equal to 8. We also need to guarantee that + * the numerator can never have a length greater than the denominator after + * each loop iteration. This requires the highest block of the denominator + * to be less than or equal to 429496729 which is the highest number that + * can be multiplied by 10 without overflowing to a new block. + */ + DEBUG_ASSERT(scale->length > 0); + hiBlock = scale->blocks[scale->length - 1]; + if (hiBlock < 8 || hiBlock > 429496729) { + npy_uint32 hiBlockLog2, shift; + + /* + * Perform a bit shift on all values to get the highest block of the + * denominator into the range [8,429496729]. We are more likely to make + * accurate quotient estimations in + * BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator + * values so we shift the denominator to place the highest bit at index + * 27 of the highest block. This is safe because (2^28 - 1) = 268435455 + * which is less than 429496729. This means that all values with a + * highest bit at index 27 are within range. + */ + hiBlockLog2 = LogBase2_32(hiBlock); + DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27); + shift = (32 + 27 - hiBlockLog2) % 32; + + BigInt_ShiftLeft(scale, shift); + BigInt_ShiftLeft(scaledValue, shift); + BigInt_ShiftLeft(scaledMarginLow, shift); + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + + if (digitMode == DigitMode_Unique) { + /* + * For the unique cutoff mode, we will try to print until we have + * reached a level of precision that uniquely distinguishes this value + * from its neighbors. If we run out of space in the output buffer, we + * terminate early. + */ + for (;;) { + BigInt *scaledValueHigh = temp1; + + digitExponent = digitExponent - 1; + + /* divide out the scale to extract the digit */ + outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + DEBUG_ASSERT(outputDigit < 10); + + /* update the high end of the value */ + BigInt_Add(scaledValueHigh, scaledValue, scaledMarginHigh); + + /* + * stop looping if we are far enough away from our neighboring + * values (and we have printed at least the requested minimum + * digits) or if we have reached the cutoff digit + */ + cmp = BigInt_Compare(scaledValue, scaledMarginLow); + low = isEven ? (cmp <= 0) : (cmp < 0); + cmp = BigInt_Compare(scaledValueHigh, scale); + high = isEven ? (cmp >= 0) : (cmp > 0); + if (((low | high) & (digitExponent <= cutoff_min_Exponent)) | + (digitExponent == cutoff_max_Exponent)) { + break; + } + + /* store the output digit */ + *curDigit = (char)('0' + outputDigit); + ++curDigit; + + /* multiply larger by the output base */ + BigInt_Multiply10(scaledValue); + BigInt_Multiply10(scaledMarginLow); + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + } + else { + /* + * For exact digit mode, we will try to print until we + * have exhausted all precision (i.e. all remaining digits are zeros) or + * until we reach the desired cutoff digit. + */ + low = NPY_FALSE; + high = NPY_FALSE; + + for (;;) { + digitExponent = digitExponent - 1; + + /* divide out the scale to extract the digit */ + outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + DEBUG_ASSERT(outputDigit < 10); + + if ((scaledValue->length == 0) | (digitExponent == cutoff_max_Exponent)) { + break; + } + + /* store the output digit */ + *curDigit = (char)('0' + outputDigit); + ++curDigit; + + /* multiply larger by the output base */ + BigInt_Multiply10(scaledValue); + } + } + + /* default to rounding down the final digit if value got too close to 0 */ + roundDown = low; + + /* if it is legal to round up and down */ + if (low == high) { + npy_int32 compare; + + /* + * round to the closest digit by comparing value with 0.5. To do this we + * need to convert the inequality to large integer values. + * compare( value, 0.5 ) + * compare( scale * value, scale * 0.5 ) + * compare( 2 * scale * value, scale ) + */ + BigInt_Multiply2_inplace(scaledValue); + compare = BigInt_Compare(scaledValue, scale); + roundDown = compare < 0; + + /* + * if we are directly in the middle, round towards the even digit (i.e. + * IEEE rounding rules) + */ + if (compare == 0) { + roundDown = (outputDigit & 1) == 0; + } + } + + /* print the rounded digit */ + if (roundDown) { + *curDigit = (char)('0' + outputDigit); + ++curDigit; + } + else { + /* handle rounding up */ + if (outputDigit == 9) { + /* find the first non-nine prior digit */ + for (;;) { + /* if we are at the first digit */ + if (curDigit == pOutBuffer) { + /* output 1 at the next highest exponent */ + *curDigit = '1'; + ++curDigit; + *pOutExponent += 1; + break; + } + + --curDigit; + if (*curDigit != '9') { + /* increment the digit */ + *curDigit += 1; + ++curDigit; + break; + } + } + } + else { + /* values in the range [0,8] can perform a simple round up */ + *curDigit = (char)('0' + outputDigit + 1); + ++curDigit; + } + } + + /* return the number of digits output */ + outputLen = (npy_uint32)(curDigit - pOutBuffer); + DEBUG_ASSERT(outputLen <= bufferSize); + return outputLen; +} + +static npy_uint32 +FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, + char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, + DigitMode digit_mode, CutoffMode cutoff_mode, npy_int32 precision, + npy_int32 min_digits, TrimMode trim_mode, npy_int32 digits_left, + npy_int32 digits_right) +{ + npy_int32 printExponent; + npy_int32 numDigits, numWholeDigits = 0, has_sign = 0; + npy_int32 add_digits; + + npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0; + + /* track the # of digits past the decimal point that have been printed */ + npy_int32 numFractionDigits = 0, desiredFractionalDigits; + + DEBUG_ASSERT(bufferSize > 0); + + if (digit_mode != DigitMode_Unique) { + DEBUG_ASSERT(precision >= 0); + } + + if (signbit == '+' && pos < maxPrintLen) { + buffer[pos++] = '+'; + has_sign = 1; + } + else if (signbit == '-' && pos < maxPrintLen) { + buffer[pos++] = '-'; + has_sign = 1; + } + + numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, cutoff_mode, + precision, min_digits, buffer + has_sign, maxPrintLen - has_sign, + &printExponent); + + DEBUG_ASSERT(numDigits > 0); + DEBUG_ASSERT(numDigits <= bufferSize); + + /* if output has a whole number */ + if (printExponent >= 0) { + /* leave the whole number at the start of the buffer */ + numWholeDigits = printExponent + 1; + if (numDigits <= numWholeDigits) { + npy_int32 count = numWholeDigits - numDigits; + pos += numDigits; + + /* don't overflow the buffer */ + if (pos + count > maxPrintLen) { + count = maxPrintLen - pos; + } + + /* add trailing zeros up to the decimal point */ + numDigits += count; + for (; count > 0; count--) { + buffer[pos++] = '0'; + } + } + /* insert the decimal point prior to the fraction */ + else if (numDigits > numWholeDigits) { + npy_int32 maxFractionDigits; + + numFractionDigits = numDigits - numWholeDigits; + maxFractionDigits = maxPrintLen - numWholeDigits - 1 - pos; + if (numFractionDigits > maxFractionDigits) { + numFractionDigits = maxFractionDigits; + } + + memmove(buffer + pos + numWholeDigits + 1, buffer + pos + numWholeDigits, + numFractionDigits); + pos += numWholeDigits; + buffer[pos] = '.'; + numDigits = numWholeDigits + 1 + numFractionDigits; + pos += 1 + numFractionDigits; + } + } + else { + /* shift out the fraction to make room for the leading zeros */ + npy_int32 numFractionZeros = 0; + if (pos + 2 < maxPrintLen) { + npy_int32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i; + + maxFractionZeros = maxPrintLen - 2 - pos; + numFractionZeros = -(printExponent + 1); + if (numFractionZeros > maxFractionZeros) { + numFractionZeros = maxFractionZeros; + } + + digitsStartIdx = 2 + numFractionZeros; + + /* + * shift the significant digits right such that there is room for + * leading zeros + */ + numFractionDigits = numDigits; + maxFractionDigits = maxPrintLen - digitsStartIdx - pos; + if (numFractionDigits > maxFractionDigits) { + numFractionDigits = maxFractionDigits; + } + + memmove(buffer + pos + digitsStartIdx, buffer + pos, numFractionDigits); + + /* insert the leading zeros */ + for (i = 2; i < digitsStartIdx; ++i) { + buffer[pos + i] = '0'; + } + + /* update the counts */ + numFractionDigits += numFractionZeros; + numDigits = numFractionDigits; + } + + /* add the decimal point */ + if (pos + 1 < maxPrintLen) { + buffer[pos + 1] = '.'; + } + + /* add the initial zero */ + if (pos < maxPrintLen) { + buffer[pos] = '0'; + numDigits += 1; + } + numWholeDigits = 1; + pos += 2 + numFractionDigits; + } + + /* always add decimal point, except for DprZeros mode */ + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) { + buffer[pos++] = '.'; + } + + add_digits = digit_mode == DigitMode_Unique ? min_digits : precision; + desiredFractionalDigits = add_digits < 0 ? 0 : add_digits; + if (cutoff_mode == CutoffMode_TotalLength) { + desiredFractionalDigits = add_digits - numWholeDigits; + } + + if (trim_mode == TrimMode_LeaveOneZero) { + /* if we didn't print any fractional digits, add a trailing 0 */ + if (numFractionDigits == 0 && pos < maxPrintLen) { + buffer[pos++] = '0'; + numFractionDigits++; + } + } + else if (trim_mode == TrimMode_None && desiredFractionalDigits > numFractionDigits && + pos < maxPrintLen) { + /* add trailing zeros up to add_digits length */ + /* compute the number of trailing zeros needed */ + npy_int32 count = desiredFractionalDigits - numFractionDigits; + if (pos + count > maxPrintLen) { + count = maxPrintLen - pos; + } + numFractionDigits += count; + + for (; count > 0; count--) { + buffer[pos++] = '0'; + } + } + /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */ + + /* + * when rounding, we may still end up with trailing zeros. Remove them + * depending on trim settings. + */ + if (trim_mode != TrimMode_None && numFractionDigits > 0) { + while (buffer[pos - 1] == '0') { + pos--; + numFractionDigits--; + } + if (buffer[pos - 1] == '.') { + /* in TrimMode_LeaveOneZero, add trailing 0 back */ + if (trim_mode == TrimMode_LeaveOneZero) { + buffer[pos++] = '0'; + numFractionDigits++; + } + /* in TrimMode_DptZeros, remove trailing decimal point */ + else if (trim_mode == TrimMode_DptZeros) { + pos--; + } + } + } + + /* add any whitespace padding to right side */ + if (digits_right >= numFractionDigits) { + npy_int32 count = digits_right - numFractionDigits; + + /* in trim_mode DptZeros, if right padding, add a space for the . */ + if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) { + buffer[pos++] = ' '; + } + + if (pos + count > maxPrintLen) { + count = maxPrintLen - pos; + } + + for (; count > 0; count--) { + buffer[pos++] = ' '; + } + } + /* add any whitespace padding to left side */ + if (digits_left > numWholeDigits + has_sign) { + npy_int32 shift = digits_left - (numWholeDigits + has_sign); + npy_int32 count = pos; + + if (count + shift > maxPrintLen) { + count = maxPrintLen - shift; + } + + if (count > 0) { + memmove(buffer + shift, buffer, count); + } + pos = shift + count; + for (; shift > 0; shift--) { + buffer[shift - 1] = ' '; + } + } + + /* terminate the buffer */ + DEBUG_ASSERT(pos <= maxPrintLen); + buffer[pos] = '\0'; + + return pos; +} + +static npy_uint32 +FormatScientific(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, + char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, + DigitMode digit_mode, npy_int32 precision, npy_int32 min_digits, + TrimMode trim_mode, npy_int32 digits_left, npy_int32 exp_digits) +{ + npy_int32 printExponent; + npy_int32 numDigits; + char *pCurOut; + npy_int32 numFractionDigits; + npy_int32 leftchars; + npy_int32 add_digits; + + if (digit_mode != DigitMode_Unique) { + DEBUG_ASSERT(precision >= 0); + } + + DEBUG_ASSERT(bufferSize > 0); + + pCurOut = buffer; + + /* add any whitespace padding to left side */ + leftchars = 1 + (signbit == '-' || signbit == '+'); + if (digits_left > leftchars) { + int i; + for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++) { + *pCurOut = ' '; + pCurOut++; + --bufferSize; + } + } + + if (signbit == '+' && bufferSize > 1) { + *pCurOut = '+'; + pCurOut++; + --bufferSize; + } + else if (signbit == '-' && bufferSize > 1) { + *pCurOut = '-'; + pCurOut++; + --bufferSize; + } + + numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, + CutoffMode_TotalLength, precision < 0 ? -1 : precision + 1, + min_digits < 0 ? -1 : min_digits + 1, pCurOut, bufferSize, &printExponent); + + DEBUG_ASSERT(numDigits > 0); + DEBUG_ASSERT(numDigits <= bufferSize); + + /* keep the whole number as the first digit */ + if (bufferSize > 1) { + pCurOut += 1; + bufferSize -= 1; + } + + /* insert the decimal point prior to the fractional number */ + numFractionDigits = numDigits - 1; + if (numFractionDigits > 0 && bufferSize > 1) { + npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2; + + if (numFractionDigits > maxFractionDigits) { + numFractionDigits = maxFractionDigits; + } + + memmove(pCurOut + 1, pCurOut, numFractionDigits); + pCurOut[0] = '.'; + pCurOut += (1 + numFractionDigits); + bufferSize -= (1 + numFractionDigits); + } + + /* always add decimal point, except for DprZeros mode */ + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && bufferSize > 1) { + *pCurOut = '.'; + ++pCurOut; + --bufferSize; + } + + add_digits = digit_mode == DigitMode_Unique ? min_digits : precision; + add_digits = add_digits < 0 ? 0 : add_digits; + if (trim_mode == TrimMode_LeaveOneZero) { + /* if we didn't print any fractional digits, add the 0 */ + if (numFractionDigits == 0 && bufferSize > 1) { + *pCurOut = '0'; + ++pCurOut; + --bufferSize; + ++numFractionDigits; + } + } + else if (trim_mode == TrimMode_None) { + /* add trailing zeros up to add_digits length */ + if (add_digits > (npy_int32)numFractionDigits) { + char *pEnd; + /* compute the number of trailing zeros needed */ + npy_int32 numZeros = (add_digits - numFractionDigits); + + if (numZeros > (npy_int32)bufferSize - 1) { + numZeros = (npy_int32)bufferSize - 1; + } + + for (pEnd = pCurOut + numZeros; pCurOut < pEnd; ++pCurOut) { + *pCurOut = '0'; + ++numFractionDigits; + } + } + } + /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */ + + /* + * when rounding, we may still end up with trailing zeros. Remove them + * depending on trim settings. + */ + if (trim_mode != TrimMode_None && numFractionDigits > 0) { + --pCurOut; + while (*pCurOut == '0') { + --pCurOut; + ++bufferSize; + --numFractionDigits; + } + if (trim_mode == TrimMode_LeaveOneZero && *pCurOut == '.') { + ++pCurOut; + *pCurOut = '0'; + --bufferSize; + ++numFractionDigits; + } + ++pCurOut; + } + + /* print the exponent into a local buffer and copy into output buffer */ + if (bufferSize > 1) { + char exponentBuffer[7]; + npy_int32 digits[5]; + npy_int32 i, exp_size, count; + + if (exp_digits > 5) { + exp_digits = 5; + } + if (exp_digits < 0) { + exp_digits = 2; + } + + exponentBuffer[0] = 'e'; + if (printExponent >= 0) { + exponentBuffer[1] = '+'; + } + else { + exponentBuffer[1] = '-'; + printExponent = -printExponent; + } + + DEBUG_ASSERT(printExponent < 100000); + + /* get exp digits */ + for (i = 0; i < 5; i++) { + digits[i] = printExponent % 10; + printExponent /= 10; + } + /* count back over leading zeros */ + for (i = 5; i > exp_digits && digits[i - 1] == 0; i--) { + } + exp_size = i; + /* write remaining digits to tmp buf */ + for (i = exp_size; i > 0; i--) { + exponentBuffer[2 + (exp_size - i)] = (char)('0' + digits[i - 1]); + } + + /* copy the exponent buffer into the output */ + count = exp_size + 2; + if (count > (npy_int32)bufferSize - 1) { + count = (npy_int32)bufferSize - 1; + } + memcpy(pCurOut, exponentBuffer, count); + pCurOut += count; + bufferSize -= count; + } + + DEBUG_ASSERT(bufferSize > 0); + pCurOut[0] = '\0'; + + return pCurOut - buffer; +} + +static npy_uint32 +Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, + char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, + Dragon4_Options *opt) +{ + /* format the value */ + if (opt->scientific) { + return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit, + hasUnequalMargins, opt->digit_mode, opt->precision, opt->min_digits, + opt->trim_mode, opt->digits_left, opt->exp_digits); + } + else { + return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit, + hasUnequalMargins, opt->digit_mode, opt->cutoff_mode, + opt->precision, opt->min_digits, opt->trim_mode, opt->digits_left, + opt->digits_right); + } +} + +static npy_uint32 +Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt) +{ + char *buffer = _bigint_static.repr; + const npy_uint32 bufferSize = sizeof(_bigint_static.repr); + BigInt *bigints = _bigint_static.bigints; + + npy_uint32 floatExponent, floatSign; + npy_uint64 mantissa_hi, mantissa_lo; + npy_int32 exponent; + npy_uint32 mantissaBit; + npy_bool hasUnequalMargins; + char signbit = '\0'; + + // Extract the bits from the SLEEF quad value + union { + Sleef_quad q; + struct { + npy_uint64 lo; + npy_uint64 hi; + } i; + } u; + u.q = *value; + + // Extract mantissa, exponent, and sign + mantissa_hi = u.i.hi & bitmask_u64(48); + mantissa_lo = u.i.lo; + floatExponent = (u.i.hi >> 48) & bitmask_u32(15); + floatSign = u.i.hi >> 63; + + /* output the sign */ + if (floatSign != 0) { + signbit = '-'; + } + // else if (opt->sign) { + // signbit = '+'; + // } + + /* if this is a special value */ + if (floatExponent == bitmask_u32(15)) { + npy_uint64 mantissa_zero = mantissa_hi == 0 && mantissa_lo == 0; + return PrintInfNan(buffer, bufferSize, !mantissa_zero, 16, signbit); + } + /* else this is a number */ + + /* factor the value into its parts */ + if (floatExponent != 0) { + /* normal */ + mantissa_hi = (1ull << 48) | mantissa_hi; + /* mantissa_lo is unchanged */ + exponent = floatExponent - 16383 - 112; + mantissaBit = 112; + hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && mantissa_lo == 0); + } + else { + /* subnormal */ + exponent = 1 - 16383 - 112; + mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo); + hasUnequalMargins = NPY_FALSE; + } + + BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo); + return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit, + hasUnequalMargins, opt); +} + +PyObject * +Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) +{ + PyObject *ret; + if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) { + return NULL; + } + ret = PyUnicode_FromString(_bigint_static.repr); + return ret; +} + +PyObject * +Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, CutoffMode cutoff_mode, + int precision, int min_digits, int sign, TrimMode trim, int pad_left, + int pad_right) +{ + Dragon4_Options opt; + + opt.scientific = 0; + opt.digit_mode = digit_mode; + opt.cutoff_mode = cutoff_mode; + opt.precision = precision; + opt.min_digits = min_digits; + opt.sign = sign; + opt.trim_mode = trim; + opt.digits_left = pad_left; + opt.digits_right = pad_right; + opt.exp_digits = -1; + + return Dragon4_Positional_QuadDType_opt(val, &opt); +} + +PyObject * +Dragon4_Scientific_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) +{ + PyObject *ret; + if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) { + return NULL; + } + ret = PyUnicode_FromString(_bigint_static.repr); + return ret; +} + +PyObject * +Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int exp_digits) +{ + Dragon4_Options opt; + + opt.scientific = 1; + opt.digit_mode = digit_mode; + opt.cutoff_mode = CutoffMode_TotalLength; + opt.precision = precision; + opt.min_digits = min_digits; + opt.sign = sign; + opt.trim_mode = trim; + opt.digits_left = pad_left; + opt.digits_right = -1; + opt.exp_digits = exp_digits; + + return Dragon4_Scientific_QuadDType_opt(val, &opt); +} + +PyObject * +Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, + int min_digits, int sign, TrimMode trim, int pad_left, int pad_right) +{ + npy_double v; + + if (PyArray_IsScalar(obj, QuadPrecDType)) { + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; + if (quad_obj->backend == BACKEND_SLEEF) { + return Dragon4_Positional_QuadDType(&quad_obj->value.sleef_value, digit_mode, + cutoff_mode, precision, min_digits, sign, trim, + pad_left, pad_right); + } + else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value); + return Dragon4_Positional_QuadDType(&sleef_val, digit_mode, cutoff_mode, precision, + min_digits, sign, trim, pad_left, pad_right); + } + } + + return NULL; +} + +PyObject * +Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, int min_digits, int sign, + TrimMode trim, int pad_left, int exp_digits) +{ + npy_double val; + + if (PyArray_IsScalar(obj, QuadPrecDType)) { + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; + if (quad_obj->backend == BACKEND_SLEEF) { + return Dragon4_Scientific_QuadDType(&quad_obj->value.sleef_value, digit_mode, precision, + min_digits, sign, trim, pad_left, exp_digits); + } + else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value); + return Dragon4_Scientific_QuadDType(&sleef_val, digit_mode, precision, min_digits, sign, + trim, pad_left, exp_digits); + } + } + + return NULL; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/dragon4.h b/quaddtype/numpy_quaddtype/src/dragon4.h new file mode 100644 index 00000000..1977595e --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/dragon4.h @@ -0,0 +1,70 @@ +#ifndef _QUADDTYPE_DRAGON4_H +#define _QUADDTYPE_DRAGON4_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "numpy/arrayobject.h" +#include +#include "quad_common.h" + +typedef enum DigitMode +{ + /* Round digits to print shortest uniquely identifiable number. */ + DigitMode_Unique, + /* Output the digits of the number as if with infinite precision */ + DigitMode_Exact, +} DigitMode; + +typedef enum CutoffMode +{ + /* up to cutoffNumber significant digits */ + CutoffMode_TotalLength, + /* up to cutoffNumber significant digits past the decimal point */ + CutoffMode_FractionLength, +} CutoffMode; + +typedef enum TrimMode +{ + TrimMode_None, /* don't trim zeros, always leave a decimal point */ + TrimMode_LeaveOneZero, /* trim all but the zero before the decimal point */ + TrimMode_Zeros, /* trim all trailing zeros, leave decimal point */ + TrimMode_DptZeros, /* trim trailing zeros & trailing decimal point */ +} TrimMode; + +typedef struct { + int scientific; + DigitMode digit_mode; + CutoffMode cutoff_mode; + int precision; + int min_digits; + int sign; + TrimMode trim_mode; + int digits_left; + int digits_right; + int exp_digits; +} Dragon4_Options; + +PyObject *Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int pad_right); + +PyObject *Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, + int precision, int min_digits, int sign, TrimMode trim, + int pad_left, int exp_digits); + +PyObject *Dragon4_Positional(PyObject *obj, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int pad_right); + +PyObject *Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, + int min_digits, int sign, TrimMode trim, int pad_left, + int exp_digits); + +#ifdef __cplusplus +} +#endif + +#endif /* _QUADDTYPE_DRAGON4_H */ \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c new file mode 100644 index 00000000..7d0fad37 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -0,0 +1,266 @@ +#include +#include +#include + +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/dtype_api.h" + +#include "scalar.h" +#include "casts.h" +#include "dtype.h" +#include "dragon4.h" + +static inline int +quad_load(void *x, char *data_ptr, QuadBackendType backend) +{ + if (data_ptr == NULL || x == NULL) { + return -1; + } + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)x = *(Sleef_quad *)data_ptr; + } + else { + *(long double *)x = *(long double *)data_ptr; + } + return 0; +} + +static inline int +quad_store(char *data_ptr, void *x, QuadBackendType backend) +{ + if (data_ptr == NULL || x == NULL) { + return -1; + } + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)data_ptr = *(Sleef_quad *)x; + } + else { + *(long double *)data_ptr = *(long double *)x; + } + return 0; +} + +QuadPrecDTypeObject * +new_quaddtype_instance(QuadBackendType backend) +{ + QuadBackendType target_backend = backend; + if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) { + PyErr_SetString(PyExc_TypeError, "Backend must be sleef or longdouble"); + return NULL; + // target_backend = BACKEND_SLEEF; + } + + QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new( + (PyTypeObject *)&QuadPrecDType, NULL, NULL); + if (new == NULL) { + return NULL; + } + new->base.elsize = (target_backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + new->base.alignment = + (target_backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double); + new->backend = target_backend; + return new; +} + +static QuadPrecDTypeObject * +ensure_canonical(QuadPrecDTypeObject *self) +{ + Py_INCREF(self); + return self; +} + +static QuadPrecDTypeObject * +common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) +{ + // if backend mismatch then return SLEEF one (safe to cast ld to quad) + if (dtype1->backend != dtype2->backend) { + if (dtype1->backend == BACKEND_SLEEF) { + Py_INCREF(dtype1); + return dtype1; + } + + Py_INCREF(dtype2); + return dtype2; + } + Py_INCREF(dtype1); + return dtype1; +} + +static PyArray_DTypeMeta * +common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) +{ + // Promote integer and floating-point types to QuadPrecDType + if (other->type_num >= 0 && + (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) { + Py_INCREF(cls); + return cls; + } + // Don't promote complex types + if (PyTypeNum_ISCOMPLEX(other->type_num)) { + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; + } + + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; +} + +static PyArray_Descr * +quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj) +{ + if (Py_TYPE(obj) != &QuadPrecision_Type) { + PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array."); + return NULL; + } + + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; + + return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend); +} + +static int +quadprec_setitem(QuadPrecDTypeObject *descr, PyObject *obj, char *dataptr) +{ + QuadPrecisionObject *value; + if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) { + Py_INCREF(obj); + value = (QuadPrecisionObject *)obj; + } + else { + value = QuadPrecision_from_object(obj, descr->backend); + if (value == NULL) { + return -1; + } + } + + if (quad_store(dataptr, &value->value, descr->backend) < 0) { + Py_DECREF(value); + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void *)dataptr); + PyErr_SetString(PyExc_ValueError, error_msg); + return -1; + } + + Py_DECREF(value); + return 0; +} + +static PyObject * +quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) +{ + QuadPrecisionObject *new = QuadPrecision_raw_new(descr->backend); + if (!new) { + return NULL; + } + if (quad_load(&new->value, dataptr, descr->backend) < 0) { + Py_DECREF(new); + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void *)dataptr); + PyErr_SetString(PyExc_ValueError, error_msg); + return NULL; + } + return (PyObject *)new; +} + +static PyArray_Descr * +quadprec_default_descr(PyArray_DTypeMeta *cls) +{ + QuadPrecDTypeObject *temp = new_quaddtype_instance(BACKEND_SLEEF); + return (PyArray_Descr *)temp; +} + +static PyType_Slot QuadPrecDType_Slots[] = { + {NPY_DT_ensure_canonical, &ensure_canonical}, + {NPY_DT_common_instance, &common_instance}, + {NPY_DT_common_dtype, &common_dtype}, + {NPY_DT_discover_descr_from_pyobject, &quadprec_discover_descriptor_from_pyobject}, + {NPY_DT_setitem, &quadprec_setitem}, + {NPY_DT_getitem, &quadprec_getitem}, + {NPY_DT_default_descr, &quadprec_default_descr}, + {NPY_DT_PyArray_ArrFuncs_dotfunc, NULL}, + {0, NULL}}; + +static PyObject * +QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"backend", NULL}; + const char *backend_str = "sleef"; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s", kwlist, &backend_str)) { + return NULL; + } + + QuadBackendType backend = BACKEND_SLEEF; + if (strcmp(backend_str, "longdouble") == 0) { + backend = BACKEND_LONGDOUBLE; + } + else if (strcmp(backend_str, "sleef") != 0) { + PyErr_SetString(PyExc_ValueError, "Invalid backend. Use 'sleef' or 'longdouble'."); + return NULL; + } + + return (PyObject *)quadprec_discover_descriptor_from_pyobject( + &QuadPrecDType, (PyObject *)QuadPrecision_raw_new(backend)); +} + +static PyObject * +QuadPrecDType_repr(QuadPrecDTypeObject *self) +{ + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str); +} + +static PyObject * +QuadPrecDType_str(QuadPrecDTypeObject *self) +{ + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str); +} + +PyArray_DTypeMeta QuadPrecDType = { + {{ + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecDType", + .tp_basicsize = sizeof(QuadPrecDTypeObject), + .tp_new = QuadPrecDType_new, + .tp_repr = (reprfunc)QuadPrecDType_repr, + .tp_str = (reprfunc)QuadPrecDType_str, + }}, +}; + +int +init_quadprec_dtype(void) +{ + PyArrayMethod_Spec **casts = init_casts(); + if (!casts) + return -1; + + PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = { + .flags = NPY_DT_PARAMETRIC | NPY_DT_NUMERIC, + .casts = casts, + .typeobj = &QuadPrecision_Type, + .slots = QuadPrecDType_Slots, + }; + + ((PyObject *)&QuadPrecDType)->ob_type = &PyArrayDTypeMeta_Type; + + ((PyTypeObject *)&QuadPrecDType)->tp_base = &PyArrayDescr_Type; + + if (PyType_Ready((PyTypeObject *)&QuadPrecDType) < 0) { + return -1; + } + + if (PyArrayInitDTypeMeta_FromSpec(&QuadPrecDType, &QuadPrecDType_DTypeSpec) < 0) { + return -1; + } + + free_casts(); + + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/dtype.h b/quaddtype/numpy_quaddtype/src/dtype.h new file mode 100644 index 00000000..77a65449 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/dtype.h @@ -0,0 +1,30 @@ +#ifndef _QUADDTYPE_DTYPE_H +#define _QUADDTYPE_DTYPE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include "quad_common.h" + +typedef struct { + PyArray_Descr base; + QuadBackendType backend; +} QuadPrecDTypeObject; + +extern PyArray_DTypeMeta QuadPrecDType; + +QuadPrecDTypeObject * +new_quaddtype_instance(QuadBackendType backend); + +int +init_quadprec_dtype(void); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp new file mode 100644 index 00000000..b32ae386 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -0,0 +1,453 @@ +#include +#include +#include + +// Unary Quad Operations +typedef Sleef_quad (*unary_op_quad_def)(Sleef_quad *); + +static Sleef_quad +quad_negative(Sleef_quad *op) +{ + return Sleef_negq1(*op); +} + +static Sleef_quad +quad_positive(Sleef_quad *op) +{ + return *op; +} + +static inline Sleef_quad +quad_absolute(Sleef_quad *op) +{ + return Sleef_fabsq1(*op); +} + +static inline Sleef_quad +quad_rint(Sleef_quad *op) +{ + return Sleef_rintq1(*op); +} + +static inline Sleef_quad +quad_trunc(Sleef_quad *op) +{ + return Sleef_truncq1(*op); +} + +static inline Sleef_quad +quad_floor(Sleef_quad *op) +{ + return Sleef_floorq1(*op); +} + +static inline Sleef_quad +quad_ceil(Sleef_quad *op) +{ + return Sleef_ceilq1(*op); +} + +static inline Sleef_quad +quad_sqrt(Sleef_quad *op) +{ + return Sleef_sqrtq1_u05(*op); +} + +static inline Sleef_quad +quad_square(Sleef_quad *op) +{ + return Sleef_mulq1_u05(*op, *op); +} + +static inline Sleef_quad +quad_log(Sleef_quad *op) +{ + return Sleef_logq1_u10(*op); +} + +static inline Sleef_quad +quad_log2(Sleef_quad *op) +{ + return Sleef_log2q1_u10(*op); +} + +static inline Sleef_quad +quad_log10(Sleef_quad *op) +{ + return Sleef_log10q1_u10(*op); +} + +static inline Sleef_quad +quad_log1p(Sleef_quad *op) +{ + return Sleef_log1pq1_u10(*op); +} + +static inline Sleef_quad +quad_exp(Sleef_quad *op) +{ + return Sleef_expq1_u10(*op); +} + +static inline Sleef_quad +quad_exp2(Sleef_quad *op) +{ + return Sleef_exp2q1_u10(*op); +} + +static inline Sleef_quad +quad_sin(Sleef_quad *op) +{ + return Sleef_sinq1_u10(*op); +} + +static inline Sleef_quad +quad_cos(Sleef_quad *op) +{ + return Sleef_cosq1_u10(*op); +} + +static inline Sleef_quad +quad_tan(Sleef_quad *op) +{ + return Sleef_tanq1_u10(*op); +} + +static inline Sleef_quad +quad_asin(Sleef_quad *op) +{ + return Sleef_asinq1_u10(*op); +} + +static inline Sleef_quad +quad_acos(Sleef_quad *op) +{ + return Sleef_acosq1_u10(*op); +} + +static inline Sleef_quad +quad_atan(Sleef_quad *op) +{ + return Sleef_atanq1_u10(*op); +} + +// Unary long double operations +typedef long double (*unary_op_longdouble_def)(long double *); + +static inline long double +ld_negative(long double *op) +{ + return -(*op); +} + +static inline long double +ld_positive(long double *op) +{ + return *op; +} + +static inline long double +ld_absolute(long double *op) +{ + return fabsl(*op); +} + +static inline long double +ld_rint(long double *op) +{ + return rintl(*op); +} + +static inline long double +ld_trunc(long double *op) +{ + return truncl(*op); +} + +static inline long double +ld_floor(long double *op) +{ + return floorl(*op); +} + +static inline long double +ld_ceil(long double *op) +{ + return ceill(*op); +} + +static inline long double +ld_sqrt(long double *op) +{ + return sqrtl(*op); +} + +static inline long double +ld_square(long double *op) +{ + return (*op) * (*op); +} + +static inline long double +ld_log(long double *op) +{ + return logl(*op); +} + +static inline long double +ld_log2(long double *op) +{ + return log2l(*op); +} + +static inline long double +ld_log10(long double *op) +{ + return log10l(*op); +} + +static inline long double +ld_log1p(long double *op) +{ + return log1pl(*op); +} + +static inline long double +ld_exp(long double *op) +{ + return expl(*op); +} + +static inline long double +ld_exp2(long double *op) +{ + return exp2l(*op); +} + +static inline long double +ld_sin(long double *op) +{ + return sinl(*op); +} + +static inline long double +ld_cos(long double *op) +{ + return cosl(*op); +} + +static inline long double +ld_tan(long double *op) +{ + return tanl(*op); +} + +static inline long double +ld_asin(long double *op) +{ + return asinl(*op); +} + +static inline long double +ld_acos(long double *op) +{ + return acosl(*op); +} + +static inline long double +ld_atan(long double *op) +{ + return atanl(*op); +} + +// Binary Quad operations +typedef Sleef_quad (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *); + +static inline Sleef_quad +quad_add(Sleef_quad *in1, Sleef_quad *in2) +{ + return Sleef_addq1_u05(*in1, *in2); +} + +static inline Sleef_quad +quad_sub(Sleef_quad *in1, Sleef_quad *in2) +{ + return Sleef_subq1_u05(*in1, *in2); +} + +static inline Sleef_quad +quad_mul(Sleef_quad *a, Sleef_quad *b) +{ + return Sleef_mulq1_u05(*a, *b); +} + +static inline Sleef_quad +quad_div(Sleef_quad *a, Sleef_quad *b) +{ + return Sleef_divq1_u05(*a, *b); +} + +static inline Sleef_quad +quad_pow(Sleef_quad *a, Sleef_quad *b) +{ + return Sleef_powq1_u10(*a, *b); +} + +static inline Sleef_quad +quad_mod(Sleef_quad *a, Sleef_quad *b) +{ + return Sleef_fmodq1(*a, *b); +} + +static inline Sleef_quad +quad_minimum(Sleef_quad *in1, Sleef_quad *in2) +{ + return Sleef_icmpleq1(*in1, *in2) ? *in1 : *in2; +} + +static inline Sleef_quad +quad_maximum(Sleef_quad *in1, Sleef_quad *in2) +{ + return Sleef_icmpgeq1(*in1, *in2) ? *in1 : *in2; +} + +static inline Sleef_quad +quad_atan2(Sleef_quad *in1, Sleef_quad *in2) +{ + return Sleef_atan2q1_u10(*in1, *in2); +} + +// Binary long double operations +typedef long double (*binary_op_longdouble_def)(long double *, long double *); + +static inline long double +ld_add(long double *in1, long double *in2) +{ + return (*in1) + (*in2); +} + +static inline long double +ld_sub(long double *in1, long double *in2) +{ + return (*in1) - (*in2); +} + +static inline long double +ld_mul(long double *a, long double *b) +{ + return (*a) * (*b); +} + +static inline long double +ld_div(long double *a, long double *b) +{ + return (*a) / (*b); +} + +static inline long double +ld_pow(long double *a, long double *b) +{ + return powl(*a, *b); +} + +static inline long double +ld_mod(long double *a, long double *b) +{ + return fmodl(*a, *b); +} + +static inline long double +ld_minimum(long double *in1, long double *in2) +{ + return (*in1 < *in2) ? *in1 : *in2; +} + +static inline long double +ld_maximum(long double *in1, long double *in2) +{ + return (*in1 > *in2) ? *in1 : *in2; +} + +static inline long double +ld_atan2(long double *in1, long double *in2) +{ + return atan2l(*in1, *in2); +} + +// comparison quad functions +typedef npy_bool (*cmp_quad_def)(const Sleef_quad *, const Sleef_quad *); + +static inline npy_bool +quad_equal(const Sleef_quad *a, const Sleef_quad *b) +{ + return Sleef_icmpeqq1(*a, *b); +} + +static inline npy_bool +quad_notequal(const Sleef_quad *a, const Sleef_quad *b) +{ + return Sleef_icmpneq1(*a, *b); +} + +static inline npy_bool +quad_less(const Sleef_quad *a, const Sleef_quad *b) +{ + return Sleef_icmpltq1(*a, *b); +} + +static inline npy_bool +quad_lessequal(const Sleef_quad *a, const Sleef_quad *b) +{ + return Sleef_icmpleq1(*a, *b); +} + +static inline npy_bool +quad_greater(const Sleef_quad *a, const Sleef_quad *b) +{ + return Sleef_icmpgtq1(*a, *b); +} + +static inline npy_bool +quad_greaterequal(const Sleef_quad *a, const Sleef_quad *b) +{ + return Sleef_icmpgeq1(*a, *b); +} + +// comparison quad functions +typedef npy_bool (*cmp_londouble_def)(const long double *, const long double *); + +static inline npy_bool +ld_equal(const long double *a, const long double *b) +{ + return *a == *b; +} + +static inline npy_bool +ld_notequal(const long double *a, const long double *b) +{ + return *a != *b; +} + +static inline npy_bool +ld_less(const long double *a, const long double *b) +{ + return *a < *b; +} + +static inline npy_bool +ld_lessequal(const long double *a, const long double *b) +{ + return *a <= *b; +} + +static inline npy_bool +ld_greater(const long double *a, const long double *b) +{ + return *a > *b; +} + +static inline npy_bool +ld_greaterequal(const long double *a, const long double *b) +{ + return *a >= *b; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h new file mode 100644 index 00000000..bc578a29 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/quad_common.h @@ -0,0 +1,18 @@ +#ifndef _QUADDTYPE_COMMON_H +#define _QUADDTYPE_COMMON_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + BACKEND_INVALID = -1, + BACKEND_SLEEF, + BACKEND_LONGDOUBLE +} QuadBackendType; + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c new file mode 100644 index 00000000..f2935299 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include + +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION + +#include "numpy/arrayobject.h" +#include "numpy/dtype_api.h" +#include "numpy/ufuncobject.h" + +#include "scalar.h" +#include "dtype.h" +#include "umath.h" +#include "quad_common.h" +#include "float.h" + + +static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) { + if(sizeof(long double) == 16 && + LDBL_MANT_DIG == 113 && + LDBL_MAX_EXP == 16384) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +static PyObject* get_sleef_constant(PyObject* self, PyObject* args) { + const char* constant_name; + if (!PyArg_ParseTuple(args, "s", &constant_name)) { + return NULL; + } + + QuadPrecisionObject* result = QuadPrecision_raw_new(BACKEND_SLEEF); + if (result == NULL) { + return NULL; + } + + if (strcmp(constant_name, "pi") == 0) { + result->value.sleef_value = SLEEF_M_PIq; + } else if (strcmp(constant_name, "e") == 0) { + result->value.sleef_value = SLEEF_M_Eq; + } else if (strcmp(constant_name, "log2e") == 0) { + result->value.sleef_value = SLEEF_M_LOG2Eq; + } else if (strcmp(constant_name, "log10e") == 0) { + result->value.sleef_value = SLEEF_M_LOG10Eq; + } else if (strcmp(constant_name, "ln2") == 0) { + result->value.sleef_value = SLEEF_M_LN2q; + } else if (strcmp(constant_name, "ln10") == 0) { + result->value.sleef_value = SLEEF_M_LN10q; + } else if (strcmp(constant_name, "quad_max") == 0) { + result->value.sleef_value = SLEEF_QUAD_MAX; + } else if (strcmp(constant_name, "quad_min") == 0) { + result->value.sleef_value = SLEEF_QUAD_MIN; + } else if (strcmp(constant_name, "epsilon") == 0) { + result->value.sleef_value = SLEEF_QUAD_EPSILON; + } + else { + PyErr_SetString(PyExc_ValueError, "Unknown constant name"); + Py_DECREF(result); + return NULL; + } + + return (PyObject*)result; +} + +static PyMethodDef module_methods[] = { + {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"}, + {"get_sleef_constant", get_sleef_constant, METH_VARARGS, "Get Sleef constant by name"}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + .m_name = "_quaddtype_main", + .m_doc = "Quad (128-bit) floating point Data Type for NumPy with multiple backends", + .m_size = -1, + .m_methods = module_methods +}; + +PyMODINIT_FUNC +PyInit__quaddtype_main(void) +{ + import_array(); + import_umath(); + PyObject *m = PyModule_Create(&moduledef); + if (!m) { + return NULL; + } + + if (init_quadprecision_scalar() < 0) + goto error; + + if (PyModule_AddObject(m, "QuadPrecision", (PyObject *)&QuadPrecision_Type) < 0) + goto error; + + if (init_quadprec_dtype() < 0) + goto error; + + if (PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0) + goto error; + + if (init_quad_umath() < 0) { + goto error; + } + + return m; + +error: + Py_XDECREF(m); + return NULL; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c new file mode 100644 index 00000000..9bec08a6 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include + +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/dtype_api.h" + +#include "scalar.h" +#include "scalar_ops.h" +#include "dragon4.h" + +QuadPrecisionObject * +QuadPrecision_raw_new(QuadBackendType backend) +{ + QuadPrecisionObject *new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type); + if (!new) + return NULL; + new->backend = backend; + if (backend == BACKEND_SLEEF) { + new->value.sleef_value = Sleef_cast_from_doubleq1(0.0); + } + else { + new->value.longdouble_value = 0.0L; + } + return new; +} + +QuadPrecisionObject * +QuadPrecision_from_object(PyObject *value, QuadBackendType backend) +{ + QuadPrecisionObject *self = QuadPrecision_raw_new(backend); + if (!self) + return NULL; + + if (PyFloat_Check(value)) { + double dval = PyFloat_AsDouble(value); + if (backend == BACKEND_SLEEF) { + self->value.sleef_value = Sleef_cast_from_doubleq1(dval); + } + else { + self->value.longdouble_value = (long double)dval; + } + } + else if (PyUnicode_CheckExact(value)) { + const char *s = PyUnicode_AsUTF8(value); + char *endptr = NULL; + if (backend == BACKEND_SLEEF) { + self->value.sleef_value = Sleef_strtoq(s, &endptr); + } + else { + self->value.longdouble_value = strtold(s, &endptr); + } + if (*endptr != '\0' || endptr == s) { + PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision"); + Py_DECREF(self); + return NULL; + } + } + else if (PyLong_Check(value)) { + long long val = PyLong_AsLongLong(value); + if (val == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_OverflowError, "Overflow Error, value out of range"); + Py_DECREF(self); + return NULL; + } + if (backend == BACKEND_SLEEF) { + self->value.sleef_value = Sleef_cast_from_int64q1(val); + } + else { + self->value.longdouble_value = (long double)val; + } + } + else if (Py_TYPE(value) == &QuadPrecision_Type) { + Py_DECREF(self); // discard the default one + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)value; + // create a new one with the same backend + QuadPrecisionObject *self = QuadPrecision_raw_new(quad_obj->backend); + if (quad_obj->backend == BACKEND_SLEEF) { + self->value.sleef_value = quad_obj->value.sleef_value; + } + else { + self->value.longdouble_value = quad_obj->value.longdouble_value; + } + + return self; + } + else { + PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value)); + if (type_str != NULL) { + const char *type_cstr = PyUnicode_AsUTF8(type_str); + if (type_cstr != NULL) { + PyErr_Format(PyExc_TypeError, + "QuadPrecision value must be a quad, float, int or string, but got %s " + "instead", + type_cstr); + } + else { + PyErr_SetString( + PyExc_TypeError, + "QuadPrecision value must be a quad, float, int or string, but got an " + "unknown type instead"); + } + Py_DECREF(type_str); + } + else { + PyErr_SetString(PyExc_TypeError, + "QuadPrecision value must be a quad, float, int or string, but got an " + "unknown type instead"); + } + Py_DECREF(self); + return NULL; + } + + return self; +} + +static PyObject * +QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs) +{ + PyObject *value; + const char *backend_str = "sleef"; + static char *kwlist[] = {"value", "backend", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s", kwlist, &value, &backend_str)) { + return NULL; + } + + QuadBackendType backend = BACKEND_SLEEF; + if (strcmp(backend_str, "longdouble") == 0) { + backend = BACKEND_LONGDOUBLE; + } + else if (strcmp(backend_str, "sleef") != 0) { + PyErr_SetString(PyExc_ValueError, "Invalid backend. Use 'sleef' or 'longdouble'."); + return NULL; + } + + return (PyObject *)QuadPrecision_from_object(value, backend); +} + +static PyObject * +QuadPrecision_str_dragon4(QuadPrecisionObject *self) +{ + Dragon4_Options opt = {.scientific = 0, + .digit_mode = DigitMode_Unique, + .cutoff_mode = CutoffMode_TotalLength, + .precision = SLEEF_QUAD_DIG, + .sign = 1, + .trim_mode = TrimMode_LeaveOneZero, + .digits_left = 1, + .digits_right = SLEEF_QUAD_DIG}; + + if (self->backend == BACKEND_SLEEF) { + return Dragon4_Positional_QuadDType( + &self->value.sleef_value, opt.digit_mode, opt.cutoff_mode, opt.precision, + opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right); + } + else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value); + return Dragon4_Positional_QuadDType(&sleef_val, opt.digit_mode, opt.cutoff_mode, + opt.precision, opt.min_digits, opt.sign, opt.trim_mode, + opt.digits_left, opt.digits_right); + } +} + +static PyObject * +QuadPrecision_str(QuadPrecisionObject *self) +{ + char buffer[128]; + if (self->backend == BACKEND_SLEEF) { + Sleef_snprintf(buffer, sizeof(buffer), "%.*Qe", SLEEF_QUAD_DIG, self->value.sleef_value); + } + else { + snprintf(buffer, sizeof(buffer), "%.35Le", self->value.longdouble_value); + } + return PyUnicode_FromString(buffer); +} + +static PyObject * +QuadPrecision_repr(QuadPrecisionObject *self) +{ + PyObject *str = QuadPrecision_str(self); + if (str == NULL) { + return NULL; + } + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str); + Py_DECREF(str); + return res; +} + +static PyObject * +QuadPrecision_repr_dragon4(QuadPrecisionObject *self) +{ + Dragon4_Options opt = {.scientific = 1, + .digit_mode = DigitMode_Unique, + .cutoff_mode = CutoffMode_TotalLength, + .precision = SLEEF_QUAD_DIG, + .sign = 1, + .trim_mode = TrimMode_LeaveOneZero, + .digits_left = 1, + .exp_digits = 3}; + + PyObject *str; + if (self->backend == BACKEND_SLEEF) { + str = Dragon4_Scientific_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.precision, + opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, + opt.exp_digits); + } + else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value); + str = Dragon4_Scientific_QuadDType(&sleef_val, opt.digit_mode, opt.precision, + opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, + opt.exp_digits); + } + + if (str == NULL) { + return NULL; + } + + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str); + Py_DECREF(str); + return res; +} + +static void +QuadPrecision_dealloc(QuadPrecisionObject *self) +{ + Py_TYPE(self)->tp_free((PyObject *)self); +} + +PyTypeObject QuadPrecision_Type = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision", + .tp_basicsize = sizeof(QuadPrecisionObject), + .tp_itemsize = 0, + .tp_new = QuadPrecision_new, + .tp_dealloc = (destructor)QuadPrecision_dealloc, + .tp_repr = (reprfunc)QuadPrecision_repr_dragon4, + .tp_str = (reprfunc)QuadPrecision_str_dragon4, + .tp_as_number = &quad_as_scalar, + .tp_richcompare = (richcmpfunc)quad_richcompare, +}; + +int +init_quadprecision_scalar(void) +{ + return PyType_Ready(&QuadPrecision_Type); +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h new file mode 100644 index 00000000..4fac1adf --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/scalar.h @@ -0,0 +1,41 @@ +#ifndef _QUADDTYPE_SCALAR_H +#define _QUADDTYPE_SCALAR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include "quad_common.h" + +typedef union { + Sleef_quad sleef_value; + long double longdouble_value; +} quad_value; + +typedef struct { + PyObject_HEAD + quad_value value; + QuadBackendType backend; +} QuadPrecisionObject; + +extern PyTypeObject QuadPrecision_Type; + +QuadPrecisionObject * +QuadPrecision_raw_new(QuadBackendType backend); + +QuadPrecisionObject * +QuadPrecision_from_object(PyObject *value, QuadBackendType backend); + +int +init_quadprecision_scalar(void); + +#define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type) +#define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value) + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp new file mode 100644 index 00000000..5888ad60 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp @@ -0,0 +1,243 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY + +extern "C" { +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" + +#include "numpy/dtype_api.h" +} + +#include "scalar.h" +#include "ops.hpp" +#include "scalar_ops.h" +#include "quad_common.h" + +template +static PyObject * +quad_unary_func(QuadPrecisionObject *self) +{ + QuadPrecisionObject *res = QuadPrecision_raw_new(self->backend); + if (!res) { + return NULL; + } + + if (self->backend == BACKEND_SLEEF) { + res->value.sleef_value = sleef_op(&self->value.sleef_value); + } + else { + res->value.longdouble_value = longdouble_op(&self->value.longdouble_value); + } + return (PyObject *)res; +} + +PyObject * +quad_nonzero(QuadPrecisionObject *self) +{ + if (self->backend == BACKEND_SLEEF) { + return PyBool_FromLong(Sleef_icmpneq1(self->value.sleef_value, Sleef_cast_from_int64q1(0))); + } + else { + return PyBool_FromLong(self->value.longdouble_value != 0.0L); + } +} + +template +static PyObject * +quad_binary_func(PyObject *op1, PyObject *op2) +{ + QuadPrecisionObject *self; + PyObject *other; + QuadPrecisionObject *other_quad = NULL; + int is_forward; + QuadBackendType backend; + + if (PyObject_TypeCheck(op1, &QuadPrecision_Type)) { + is_forward = 1; + self = (QuadPrecisionObject *)op1; + other = Py_NewRef(op2); + backend = self->backend; + } + else { + is_forward = 0; + self = (QuadPrecisionObject *)op2; + other = Py_NewRef(op1); + backend = self->backend; + } + + if (PyObject_TypeCheck(other, &QuadPrecision_Type)) { + Py_INCREF(other); + other_quad = (QuadPrecisionObject *)other; + if (other_quad->backend != backend) { + PyErr_SetString(PyExc_TypeError, "Cannot mix QuadPrecision backends"); + Py_DECREF(other); + return NULL; + } + } + else if (PyLong_Check(other) || PyFloat_Check(other)) { + other_quad = QuadPrecision_from_object(other, backend); + if (!other_quad) { + Py_DECREF(other); + return NULL; + } + } + else { + Py_DECREF(other); + Py_RETURN_NOTIMPLEMENTED; + } + + QuadPrecisionObject *res = QuadPrecision_raw_new(backend); + if (!res) { + Py_DECREF(other_quad); + Py_DECREF(other); + return NULL; + } + + if (backend == BACKEND_SLEEF) { + if (is_forward) { + res->value.sleef_value = + sleef_op(&self->value.sleef_value, &other_quad->value.sleef_value); + } + else { + res->value.sleef_value = + sleef_op(&other_quad->value.sleef_value, &self->value.sleef_value); + } + } + else { + if (is_forward) { + res->value.longdouble_value = longdouble_op(&self->value.longdouble_value, + &other_quad->value.longdouble_value); + } + else { + res->value.longdouble_value = longdouble_op(&other_quad->value.longdouble_value, + &self->value.longdouble_value); + } + } + + Py_DECREF(other_quad); + Py_DECREF(other); + return (PyObject *)res; +} + +PyObject * +quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op) +{ + QuadPrecisionObject *other_quad = NULL; + QuadBackendType backend = self->backend; + + if (PyObject_TypeCheck(other, &QuadPrecision_Type)) { + Py_INCREF(other); + other_quad = (QuadPrecisionObject *)other; + if (other_quad->backend != backend) { + PyErr_SetString(PyExc_TypeError, + "Cannot compare QuadPrecision objects with different backends"); + Py_DECREF(other_quad); + return NULL; + } + } + else if (PyLong_CheckExact(other) || PyFloat_CheckExact(other)) { + other_quad = QuadPrecision_from_object(other, backend); + if (other_quad == NULL) { + return NULL; + } + } + else { + Py_RETURN_NOTIMPLEMENTED; + } + + int cmp; + if (backend == BACKEND_SLEEF) { + switch (cmp_op) { + case Py_LT: + cmp = Sleef_icmpltq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_LE: + cmp = Sleef_icmpleq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_EQ: + cmp = Sleef_icmpeqq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_NE: + cmp = Sleef_icmpneq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_GT: + cmp = Sleef_icmpgtq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_GE: + cmp = Sleef_icmpgeq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + default: + Py_DECREF(other_quad); + Py_RETURN_NOTIMPLEMENTED; + } + } + else { + switch (cmp_op) { + case Py_LT: + cmp = self->value.longdouble_value < other_quad->value.longdouble_value; + break; + case Py_LE: + cmp = self->value.longdouble_value <= other_quad->value.longdouble_value; + break; + case Py_EQ: + cmp = self->value.longdouble_value == other_quad->value.longdouble_value; + break; + case Py_NE: + cmp = self->value.longdouble_value != other_quad->value.longdouble_value; + break; + case Py_GT: + cmp = self->value.longdouble_value > other_quad->value.longdouble_value; + break; + case Py_GE: + cmp = self->value.longdouble_value >= other_quad->value.longdouble_value; + break; + default: + Py_DECREF(other_quad); + Py_RETURN_NOTIMPLEMENTED; + } + } + Py_DECREF(other_quad); + + return PyBool_FromLong(cmp); +} + +static PyObject * +QuadPrecision_float(QuadPrecisionObject *self) +{ + if (self->backend == BACKEND_SLEEF) { + return PyFloat_FromDouble(Sleef_cast_to_doubleq1(self->value.sleef_value)); + } + else { + return PyFloat_FromDouble((double)self->value.longdouble_value); + } +} + +static PyObject * +QuadPrecision_int(QuadPrecisionObject *self) +{ + if (self->backend == BACKEND_SLEEF) { + return PyLong_FromLongLong(Sleef_cast_to_int64q1(self->value.sleef_value)); + } + else { + return PyLong_FromLongLong((long long)self->value.longdouble_value); + } +} + +PyNumberMethods quad_as_scalar = { + .nb_add = (binaryfunc)quad_binary_func, + .nb_subtract = (binaryfunc)quad_binary_func, + .nb_multiply = (binaryfunc)quad_binary_func, + .nb_power = (ternaryfunc)quad_binary_func, + .nb_negative = (unaryfunc)quad_unary_func, + .nb_positive = (unaryfunc)quad_unary_func, + .nb_absolute = (unaryfunc)quad_unary_func, + .nb_bool = (inquiry)quad_nonzero, + .nb_int = (unaryfunc)QuadPrecision_int, + .nb_float = (unaryfunc)QuadPrecision_float, + .nb_true_divide = (binaryfunc)quad_binary_func, +}; \ No newline at end of file diff --git a/quaddtype/quaddtype/src/scalar_ops.h b/quaddtype/numpy_quaddtype/src/scalar_ops.h similarity index 100% rename from quaddtype/quaddtype/src/scalar_ops.h rename to quaddtype/numpy_quaddtype/src/scalar_ops.h diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp new file mode 100644 index 00000000..0058236a --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -0,0 +1,806 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" + +#include "numpy/dtype_api.h" +} +#include "quad_common.h" +#include "scalar.h" +#include "dtype.h" +#include "umath.h" +#include "ops.hpp" + +// helper debugging function +static const char * +get_dtype_name(PyArray_DTypeMeta *dtype) +{ + if (dtype == &QuadPrecDType) { + return "QuadPrecDType"; + } + else if (dtype == &PyArray_BoolDType) { + return "BoolDType"; + } + else if (dtype == &PyArray_ByteDType) { + return "ByteDType"; + } + else if (dtype == &PyArray_UByteDType) { + return "UByteDType"; + } + else if (dtype == &PyArray_ShortDType) { + return "ShortDType"; + } + else if (dtype == &PyArray_UShortDType) { + return "UShortDType"; + } + else if (dtype == &PyArray_IntDType) { + return "IntDType"; + } + else if (dtype == &PyArray_UIntDType) { + return "UIntDType"; + } + else if (dtype == &PyArray_LongDType) { + return "LongDType"; + } + else if (dtype == &PyArray_ULongDType) { + return "ULongDType"; + } + else if (dtype == &PyArray_LongLongDType) { + return "LongLongDType"; + } + else if (dtype == &PyArray_ULongLongDType) { + return "ULongLongDType"; + } + else if (dtype == &PyArray_FloatDType) { + return "FloatDType"; + } + else if (dtype == &PyArray_DoubleDType) { + return "DoubleDType"; + } + else if (dtype == &PyArray_LongDoubleDType) { + return "LongDoubleDType"; + } + else { + return "UnknownDType"; + } +} + +static NPY_CASTING +quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + Py_INCREF(given_descrs[0]); + loop_descrs[1] = given_descrs[0]; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[1]; + + if (descr_in->backend != descr_out->backend) { + return NPY_UNSAFE_CASTING; + } + + return NPY_NO_CASTING; +} + +template +int +quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in, out; + while (N--) { + memcpy(&in, in_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + out.sleef_value = sleef_op(&in.sleef_value); + } + else { + out.longdouble_value = longdouble_op(&in.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); + + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in_ptr); + } + else { + *(long double *)out_ptr = longdouble_op((long double *)in_ptr); + } + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[2] = {&QuadPrecDType, &QuadPrecDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_unary_op_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_unary_op_strided_loop_unaligned}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_unary_op", + .nin = 1, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + return 0; +} + +int +init_quad_unary_ops(PyObject *numpy) +{ + if (create_quad_unary_ufunc(numpy, "negative") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "positive") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "absolute") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "rint") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "trunc") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "floor") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "ceil") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "square") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log2") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log10") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log1p") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "exp") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "exp2") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "sin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "cos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "tan") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arcsin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arccos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arctan") < 0) { + return -1; + } + return 0; +} + +// Binary ufuncs + +static NPY_CASTING +quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + + // Determine target backend and if casting is needed + NPY_CASTING casting = NPY_NO_CASTING; + if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + } + else { + target_backend = descr_in1->backend; + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + } + + // Set up output descriptor + if (given_descrs[2] == NULL) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; + if (descr_out->backend != target_backend) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[2]); + loop_descrs[2] = given_descrs[2]; + } + } + return casting; +} + +template +int +quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2, out; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + out.sleef_value = sleef_op(&in1.sleef_value, &in2.sleef_value); + } + else { + out.longdouble_value = longdouble_op(&in1.longdouble_value, &in2.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); + } + else { + *(long double *)out_ptr = longdouble_op((long double *)in1_ptr, (long double *)in2_ptr); + } + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +static int +quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], + PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) +{ + int nin = ufunc->nin; + int nargs = ufunc->nargs; + PyArray_DTypeMeta *common = NULL; + bool has_quad = false; + + // Handle the special case for reductions + if (op_dtypes[0] == NULL) { + assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ + for (int i = 0; i < 3; i++) { + Py_INCREF(op_dtypes[1]); + new_op_dtypes[i] = op_dtypes[1]; + } + return 0; + } + + // Check if any input or signature is QuadPrecision + for (int i = 0; i < nin; i++) { + if (op_dtypes[i] == &QuadPrecDType) { + has_quad = true; + } + } + + if (has_quad) { + common = &QuadPrecDType; + } + else { + for (int i = nin; i < nargs; i++) { + if (signature[i] != NULL) { + if (common == NULL) { + Py_INCREF(signature[i]); + common = signature[i]; + } + else if (common != signature[i]) { + Py_CLEAR(common); // Not homogeneous, unset common + break; + } + } + } + } + // If no common output dtype, use standard promotion for inputs + if (common == NULL) { + common = PyArray_PromoteDTypeSequence(nin, op_dtypes); + if (common == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); // Do not propagate normal promotion errors + } + + return -1; + } + } + + // Set all new_op_dtypes to the common dtype + for (int i = 0; i < nargs; i++) { + if (signature[i]) { + // If signature is specified for this argument, use it + Py_INCREF(signature[i]); + new_op_dtypes[i] = signature[i]; + } + else { + // Otherwise, use the common dtype + Py_INCREF(common); + + new_op_dtypes[i] = common; + } + } + + Py_XDECREF(common); + + return 0; +} + +template +int +create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_binop_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_binop_strided_loop_unaligned}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_binop", + .nin = 2, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE), + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); + if (DTypes == 0) { + Py_DECREF(promoter_capsule); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return -1; + } + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return 0; +} + +int +init_quad_binary_ops(PyObject *numpy) +{ + if (create_quad_binary_ufunc(numpy, "add") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "subtract") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "multiply") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "divide") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "power") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "mod") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "minimum") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "maximum") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "arctan2") < 0) { + return -1; + } + return 0; +} + +// comparison functions + +static NPY_CASTING +quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + + // As dealing with different backends then cast to boolean + NPY_CASTING casting = NPY_NO_CASTING; + if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + } + else { + target_backend = descr_in1->backend; + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + } + + // Set up output descriptor + loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + return casting; +} + +template +int +quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + npy_bool result; + + if (backend == BACKEND_SLEEF) { + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else { + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + memcpy(out_ptr, &result, sizeof(npy_bool)); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + while (N--) { + quad_value in1 = *(quad_value *)in1_ptr; + quad_value in2 = *(quad_value *)in2_ptr; + + npy_bool result; + + if (backend == BACKEND_SLEEF) { + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else { + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + *(npy_bool *)out_ptr = result; + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +NPY_NO_EXPORT int +comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], + PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) +{ + PyArray_DTypeMeta *new_signature[NPY_MAXARGS]; + memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *)); + new_signature[2] = NULL; + int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes); + if (res < 0) { + return -1; + } + Py_XSETREF(new_op_dtypes[2], &PyArray_BoolDType); + return 0; +} + +template +int +create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_comp_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_comp_strided_loop}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_comp", + .nin = 2, + .nout = 1, + .casting = NPY_SAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType); + if (DTypes == 0) { + Py_DECREF(promoter_capsule); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return -1; + } + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + + return 0; +} + +int +init_quad_comps(PyObject *numpy) +{ + if (create_quad_comparison_ufunc(numpy, "equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "less") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "greater") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "greater_equal") < + 0) { + return -1; + } + + return 0; +} + +int +init_quad_umath(void) +{ + PyObject *numpy = PyImport_ImportModule("numpy"); + if (!numpy) { + PyErr_SetString(PyExc_ImportError, "Failed to import numpy module"); + return -1; + } + + if (init_quad_unary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad unary operations"); + goto err; + } + + if (init_quad_binary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad binary operations"); + goto err; + } + + if (init_quad_comps(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad comparison operations"); + goto err; + } + + Py_DECREF(numpy); + return 0; + +err: + Py_DECREF(numpy); + return -1; +} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/umath.h b/quaddtype/numpy_quaddtype/src/umath.h similarity index 100% rename from quaddtype/quaddtype/src/umath.h rename to quaddtype/numpy_quaddtype/src/umath.h diff --git a/quaddtype/pyproject.toml b/quaddtype/pyproject.toml index 68b61e7f..836b73f2 100644 --- a/quaddtype/pyproject.toml +++ b/quaddtype/pyproject.toml @@ -9,7 +9,7 @@ requires = [ build-backend = "mesonpy" [project] -name = "quaddtype" +name = "numpy_quaddtype" description = "Quad (128-bit) float dtype for numpy" version = "0.0.1" readme = 'README.md' @@ -22,4 +22,4 @@ dependencies = [ [project.optional-dependencies] test = [ "pytest", -] +] \ No newline at end of file diff --git a/quaddtype/quaddtype/__init__.py b/quaddtype/quaddtype/__init__.py deleted file mode 100644 index 9f246f73..00000000 --- a/quaddtype/quaddtype/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from ._quaddtype_main import QuadPrecDType, QuadPrecision \ No newline at end of file diff --git a/quaddtype/quaddtype/src/casts.cpp b/quaddtype/quaddtype/src/casts.cpp deleted file mode 100644 index c096fe5c..00000000 --- a/quaddtype/quaddtype/src/casts.cpp +++ /dev/null @@ -1,468 +0,0 @@ -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC - -extern "C" { -#include - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/dtype_api.h" -} -#include "sleef.h" -#include "sleefquad.h" - -#include "scalar.h" -#include "casts.h" -#include "dtype.h" - -#define NUM_CASTS 29 // 14 to_casts + 14 from_casts + 1 quad_to_quad - -static NPY_CASTING -quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), - PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), - QuadPrecDTypeObject *given_descrs[2], - QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - if (given_descrs[1] == NULL) { - Py_INCREF(given_descrs[0]); - loop_descrs[1] = given_descrs[0]; - } - else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - } - - *view_offset = 0; - return NPY_NO_CASTING; -} - -static int -quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - while (N--) { - memcpy(out_ptr, in_ptr, sizeof(Sleef_quad)); - in_ptr += in_stride; - out_ptr += out_stride; - } - return 0; -} - -// Casting from other types to QuadDType - -template -static inline Sleef_quad -to_quad(T x); - -template <> -inline Sleef_quad -to_quad(npy_bool x) -{ - return x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0); -} -template <> -inline Sleef_quad -to_quad(npy_byte x) -{ - return Sleef_cast_from_int64q1(x); -} -// template <> -// inline Sleef_quad -// to_quad(npy_ubyte x) -// { -// return Sleef_cast_from_uint64q1(x); -// } -template <> -inline Sleef_quad -to_quad(npy_short x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_ushort x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_int x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_uint x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_long x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_ulong x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_longlong x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_ulonglong x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(float x) -{ - return Sleef_cast_from_doubleq1(x); -} -template <> -inline Sleef_quad -to_quad(double x) -{ - return Sleef_cast_from_doubleq1(x); -} -template <> -inline Sleef_quad -to_quad(long double x) -{ - return Sleef_cast_from_doubleq1(x); -} - -template -static NPY_CASTING -numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) -{ - if (given_descrs[1] == NULL) { - loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(); - if (loop_descrs[1] == nullptr) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - } - - loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]); - // *view_offset = 0; - return NPY_SAFE_CASTING; -} - -template -static int -numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - - while (N--) { - T in_val; - Sleef_quad out_val; - - memcpy(&in_val, in_ptr, sizeof(T)); - out_val = to_quad(in_val); - memcpy(out_ptr, &out_val, sizeof(Sleef_quad)); - - in_ptr += strides[0]; - out_ptr += strides[1]; - } - return 0; -} - -// Casting from QuadDType to other types - -template -static inline T -from_quad(Sleef_quad x); - -template <> -inline npy_bool -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_int64q1(x) != 0; -} -template <> -inline npy_byte -from_quad(Sleef_quad x) -{ - return (npy_byte)Sleef_cast_to_int64q1(x); -} -// template <> -// inline npy_ubyte -// from_quad(Sleef_quad x) -// { -// return (npy_ubyte)Sleef_cast_to_uint64q1(x); -// } -template <> -inline npy_short -from_quad(Sleef_quad x) -{ - return (npy_short)Sleef_cast_to_int64q1(x); -} -template <> -inline npy_ushort -from_quad(Sleef_quad x) -{ - return (npy_ushort)Sleef_cast_to_uint64q1(x); -} -template <> -inline npy_int -from_quad(Sleef_quad x) -{ - return (npy_int)Sleef_cast_to_int64q1(x); -} -template <> -inline npy_uint -from_quad(Sleef_quad x) -{ - return (npy_uint)Sleef_cast_to_uint64q1(x); -} -template <> -inline npy_long -from_quad(Sleef_quad x) -{ - return (npy_long)Sleef_cast_to_int64q1(x); -} -template <> -inline npy_ulong -from_quad(Sleef_quad x) -{ - return (npy_ulong)Sleef_cast_to_uint64q1(x); -} -template <> -inline npy_longlong -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_int64q1(x); -} -template <> -inline npy_ulonglong -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_uint64q1(x); -} -template <> -inline float -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_doubleq1(x); -} -template <> -inline double -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_doubleq1(x); -} -template <> -inline long double -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_doubleq1(x); -} - -template -static NPY_CASTING -quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]); - // *view_offset = 0; - return NPY_UNSAFE_CASTING; -} - -template -static int -quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - - while (N--) { - Sleef_quad in_val = *(Sleef_quad *)in_ptr; - T *out_val = (T *)out_ptr; - *out_val = from_quad(in_val); - - in_ptr += strides[0]; - out_ptr += strides[1]; - } - return 0; -} - -static PyArrayMethod_Spec *specs[NUM_CASTS + 1]; // +1 for NULL terminator -static size_t spec_count = 0; - -void -add_spec(PyArrayMethod_Spec *spec) -{ - if (spec_count < NUM_CASTS) { - specs[spec_count++] = spec; - } -} - -// functions to add casts -template -void -add_cast_from(PyArray_DTypeMeta *to) -{ - PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{nullptr, to}; - - PyType_Slot *slots = new PyType_Slot[3]{ - {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop}, - {0, nullptr}}; - - PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ - .name = "cast_QuadPrec_to_NumPy", - .nin = 1, - .nout = 1, - .casting = NPY_UNSAFE_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, - .dtypes = dtypes, - .slots = slots, - }; - add_spec(spec); -} - -template -void -add_cast_to(PyArray_DTypeMeta *from) -{ - PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, nullptr}; - - PyType_Slot *slots = new PyType_Slot[3]{ - {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop}, - {0, nullptr}}; - - PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ - .name = "cast_NumPy_to_QuadPrec", - .nin = 1, - .nout = 1, - .casting = NPY_SAFE_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, - .dtypes = dtypes, - .slots = slots, - }; - - add_spec(spec); -} - -PyArrayMethod_Spec ** -init_casts_internal(void) -{ - PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr}; - PyType_Slot *quad2quad_slots = new PyType_Slot[4]{ - {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop}, - {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop}, - {0, nullptr}}; - - PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{ - .name = "cast_QuadPrec_to_QuadPrec", - .nin = 1, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = quad2quad_dtypes, - .slots = quad2quad_slots, - }; - - add_spec(quad2quad_spec); - - add_cast_to(&PyArray_BoolDType); - add_cast_to(&PyArray_ByteDType); - add_cast_to(&PyArray_UByteDType); - add_cast_to(&PyArray_ShortDType); - add_cast_to(&PyArray_UShortDType); - add_cast_to(&PyArray_IntDType); - add_cast_to(&PyArray_UIntDType); - add_cast_to(&PyArray_LongDType); - add_cast_to(&PyArray_ULongDType); - add_cast_to(&PyArray_LongLongDType); - add_cast_to(&PyArray_ULongLongDType); - add_cast_to(&PyArray_FloatDType); - add_cast_to(&PyArray_DoubleDType); - add_cast_to(&PyArray_LongDoubleDType); - - add_cast_from(&PyArray_BoolDType); - add_cast_from(&PyArray_ByteDType); - add_cast_from(&PyArray_UByteDType); - add_cast_from(&PyArray_ShortDType); - add_cast_from(&PyArray_UShortDType); - add_cast_from(&PyArray_IntDType); - add_cast_from(&PyArray_UIntDType); - add_cast_from(&PyArray_LongDType); - add_cast_from(&PyArray_ULongDType); - add_cast_from(&PyArray_LongLongDType); - add_cast_from(&PyArray_ULongLongDType); - add_cast_from(&PyArray_FloatDType); - add_cast_from(&PyArray_DoubleDType); - add_cast_from(&PyArray_LongDoubleDType); - - specs[spec_count] = nullptr; - return specs; -} - -PyArrayMethod_Spec ** -init_casts(void) -{ - try { - return init_casts_internal(); - } - catch (int e) { - PyErr_NoMemory(); - return nullptr; - } -} - -void -free_casts(void) -{ - for (auto cast : specs) { - if (cast == nullptr) { - continue; - } - delete cast->dtypes; - delete cast->slots; - delete cast; - } - spec_count = 0; -} diff --git a/quaddtype/quaddtype/src/dtype.c b/quaddtype/quaddtype/src/dtype.c deleted file mode 100644 index a6f485bf..00000000 --- a/quaddtype/quaddtype/src/dtype.c +++ /dev/null @@ -1,216 +0,0 @@ -#include -#include -#include - -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/dtype_api.h" - -#include "scalar.h" -#include "casts.h" -#include "dtype.h" - -static inline int quad_load(Sleef_quad *x, char *data_ptr) -{ - if (data_ptr == NULL || x == NULL) - { - return -1; - } - *x = *(Sleef_quad *)data_ptr; - return 0; -} - -static inline int quad_store(char *data_ptr, Sleef_quad x) -{ - if (data_ptr == NULL) - { - return -1; - } - *(Sleef_quad *)data_ptr = x; - return 0; -} - -QuadPrecDTypeObject * new_quaddtype_instance(void) -{ - QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new((PyTypeObject *)&QuadPrecDType, NULL, NULL); - if (new == NULL) { - return NULL; - } - new->base.elsize = sizeof(Sleef_quad); - new->base.alignment = _Alignof(Sleef_quad); - - return new; -} - -static QuadPrecDTypeObject * ensure_canonical(QuadPrecDTypeObject *self) -{ - Py_INCREF(self); - return self; -} - -static QuadPrecDTypeObject * common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) -{ - Py_INCREF(dtype1); - return dtype1; -} - - -static PyArray_DTypeMeta * common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) -{ - // Promote integer and floating-point types to QuadPrecDType - if (other->type_num >= 0 && - (PyTypeNum_ISINTEGER(other->type_num) || - PyTypeNum_ISFLOAT(other->type_num))) { - Py_INCREF(cls); - return cls; - } - // Don't promote complex types - if (PyTypeNum_ISCOMPLEX(other->type_num)) { - Py_INCREF(Py_NotImplemented); - return (PyArray_DTypeMeta *)Py_NotImplemented; - } - - Py_INCREF(Py_NotImplemented); - return (PyArray_DTypeMeta *)Py_NotImplemented; -} - -static PyArray_Descr * -quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj) -{ - if (Py_TYPE(obj) != &QuadPrecision_Type) - { - PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array."); - return NULL; - } - return (PyArray_Descr *)new_quaddtype_instance(); -} - -static int quadprec_setitem(QuadPrecDTypeObject *descr, PyObject *obj, char *dataptr) -{ - QuadPrecisionObject *value; - if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) - { - Py_INCREF(obj); - value = (QuadPrecisionObject *)obj; - } - else - { - value = QuadPrecision_from_object(obj); - if (value == NULL) { - return -1; - } - } - - if (quad_store(dataptr, value->quad.value) < 0) - { - Py_DECREF(value); - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void*)dataptr); - PyErr_SetString(PyExc_ValueError, error_msg); - return -1; - } - - Py_DECREF(value); - return 0; -} - -static PyObject * quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) -{ - QuadPrecisionObject *new = QuadPrecision_raw_new(); - if (!new) - { - return NULL; - } - if (quad_load(&new->quad.value, dataptr) < 0) - { - Py_DECREF(new); - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void*)dataptr); - PyErr_SetString(PyExc_ValueError, error_msg); - return NULL; - } - return (PyObject *)new; -} - -static PyArray_Descr *quadprec_default_descr(PyArray_DTypeMeta *NPY_UNUSED(cls)) -{ - return (PyArray_Descr *)new_quaddtype_instance(); -} - -static PyType_Slot QuadPrecDType_Slots[] = -{ - {NPY_DT_ensure_canonical, &ensure_canonical}, - {NPY_DT_common_instance, &common_instance}, - {NPY_DT_common_dtype, &common_dtype}, - {NPY_DT_discover_descr_from_pyobject, &quadprec_discover_descriptor_from_pyobject}, - {NPY_DT_setitem, &quadprec_setitem}, - {NPY_DT_getitem, &quadprec_getitem}, - {NPY_DT_default_descr, &quadprec_default_descr}, - {0, NULL} -}; - - -static PyObject * QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds) -{ - if (PyTuple_GET_SIZE(args) != 0 || (kwds != NULL && PyDict_Size(kwds) != 0)) { - PyErr_SetString(PyExc_TypeError, - "QuadPrecDType takes no arguments"); - return NULL; - } - - return (PyObject *)new_quaddtype_instance(); -} - -static PyObject * QuadPrecDType_repr(QuadPrecDTypeObject *self) -{ - return PyUnicode_FromString("QuadPrecDType()"); -} - -PyArray_DTypeMeta QuadPrecDType = { - {{ - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "QuadPrecDType.QuadPrecDType", - .tp_basicsize = sizeof(QuadPrecDTypeObject), - .tp_new = QuadPrecDType_new, - .tp_repr = (reprfunc)QuadPrecDType_repr, - .tp_str = (reprfunc)QuadPrecDType_repr, - }}, -}; - -int init_quadprec_dtype(void) -{ - PyArrayMethod_Spec **casts = init_casts(); - if (!casts) - return -1; - - PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = { - .flags = NPY_DT_NUMERIC, - .casts = casts, - .typeobj = &QuadPrecision_Type, - .slots = QuadPrecDType_Slots, - }; - - ((PyObject *)&QuadPrecDType)->ob_type = &PyArrayDTypeMeta_Type; - - ((PyTypeObject *)&QuadPrecDType)->tp_base = &PyArrayDescr_Type; - - if (PyType_Ready((PyTypeObject *)&QuadPrecDType) < 0) - { - return -1; - } - - if (PyArrayInitDTypeMeta_FromSpec(&QuadPrecDType, &QuadPrecDType_DTypeSpec) < 0) - { - return -1; - } - - free_casts(); - - return 0; -} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/dtype.h b/quaddtype/quaddtype/src/dtype.h deleted file mode 100644 index 162d5714..00000000 --- a/quaddtype/quaddtype/src/dtype.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _QUADDTYPE_DTYPE_H -#define _QUADDTYPE_DTYPE_H -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include - -#include "scalar.h" - -typedef struct -{ - PyArray_Descr base; - -} QuadPrecDTypeObject; - -extern PyArray_DTypeMeta QuadPrecDType; - -QuadPrecDTypeObject * new_quaddtype_instance(void); - -int init_quadprec_dtype(void); - -#ifdef __cplusplus -} -#endif - -#endif \ No newline at end of file diff --git a/quaddtype/quaddtype/src/ops.hpp b/quaddtype/quaddtype/src/ops.hpp deleted file mode 100644 index 6a3511c4..00000000 --- a/quaddtype/quaddtype/src/ops.hpp +++ /dev/null @@ -1,207 +0,0 @@ -#include -#include - -typedef int (*unary_op_def)(Sleef_quad *, Sleef_quad *); - -static inline int -quad_negative(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_negq1(*op); - return 0; -} - -static int -quad_positive(Sleef_quad *op, Sleef_quad *out) -{ - *out = *op; - return 0; -} - -static inline int -quad_absolute(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_fabsq1(*op); - return 0; -} - -static inline int -quad_rint(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_rintq1(*op); - return 0; -} - -static inline int -quad_trunc(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_truncq1(*op); - return 0; -} - -static inline int -quad_floor(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_floorq1(*op); - return 0; -} - -static inline int -quad_ceil(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_ceilq1(*op); - return 0; -} - -static inline int -quad_sqrt(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_sqrtq1_u05(*op); - return 0; -} - -static inline int -quad_square(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_mulq1_u05(*op, *op); - return 0; -} - -static inline int -quad_log(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_logq1_u10(*op); - return 0; -} - -static inline int -quad_log2(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_log2q1_u10(*op); - return 0; -} - -static inline int -quad_log10(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_log10q1_u10(*op); - return 0; -} - -static inline int -quad_log1p(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_log1pq1_u10(*op); - return 0; -} - -static inline int -quad_exp(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_expq1_u10(*op); - return 0; -} - -static inline int -quad_exp2(Sleef_quad *op, Sleef_quad *out) -{ - *out = Sleef_exp2q1_u10(*op); - return 0; -} - -// binary ops -typedef int (*binop_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *); - -static inline int -quad_add(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) -{ - *out = Sleef_addq1_u05(*in1, *in2); - return 0; -} - -static inline int -quad_sub(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) -{ - *out = Sleef_subq1_u05(*in1, *in2); - return 0; -} - -static inline int -quad_mul(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) -{ - *res = Sleef_mulq1_u05(*a, *b); - return 0; -} - -static inline int -quad_div(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) -{ - *res = Sleef_divq1_u05(*a, *b); - return 0; -} - -static inline int -quad_pow(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) -{ - *res = Sleef_powq1_u10(*a, *b); - return 0; -} - -static inline int -quad_mod(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) -{ - *res = Sleef_fmodq1(*a, *b); - return 0; -} - -static inline int -quad_minimum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) -{ - *out = Sleef_icmpleq1(*in1, *in2) ? *in1 : *in2; - return 0; -} - -static inline int -quad_maximum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) -{ - *out = Sleef_icmpgeq1(*in1, *in2) ? *in1 : *in2; - return 0; -} - -// comparison functions -typedef npy_bool (*cmp_def)(const Sleef_quad *, const Sleef_quad *); - -static inline npy_bool -quad_equal(const Sleef_quad *a, const Sleef_quad *b) -{ - return Sleef_icmpeqq1(*a, *b); -} - -static inline npy_bool -quad_notequal(const Sleef_quad *a, const Sleef_quad *b) -{ - return Sleef_icmpneq1(*a, *b); -} - -static inline npy_bool -quad_less(const Sleef_quad *a, const Sleef_quad *b) -{ - return Sleef_icmpltq1(*a, *b); -} - -static inline npy_bool -quad_lessequal(const Sleef_quad *a, const Sleef_quad *b) -{ - return Sleef_icmpleq1(*a, *b); -} - -static inline npy_bool -quad_greater(const Sleef_quad *a, const Sleef_quad *b) -{ - return Sleef_icmpgtq1(*a, *b); -} - -static inline npy_bool -quad_greaterequal(const Sleef_quad *a, const Sleef_quad *b) -{ - return Sleef_icmpgeq1(*a, *b); -} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/quaddtype_main.c b/quaddtype/quaddtype/src/quaddtype_main.c deleted file mode 100644 index 098c0749..00000000 --- a/quaddtype/quaddtype/src/quaddtype_main.c +++ /dev/null @@ -1,55 +0,0 @@ -#include - -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION - -#include "numpy/arrayobject.h" -#include "numpy/dtype_api.h" -#include "numpy/ufuncobject.h" - -#include "dtype.h" -#include "umath.h" - -static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - .m_name = "_quaddtype_main", - .m_doc = "Quad (128-bit) floating point Data Type for Numpy", - .m_size = -1, -}; - -PyMODINIT_FUNC -PyInit__quaddtype_main(void) -{ - import_array(); - import_umath(); - PyObject *m = PyModule_Create(&moduledef); - if (!m) - { - return NULL; - } - - if (init_quadprecision_scalar() < 0) - goto error; - - if(PyModule_AddObject(m, "QuadPrecision", (PyObject *)&QuadPrecision_Type) < 0) - goto error; - - if(init_quadprec_dtype() < 0) - goto error; - - if(PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0) - goto error; - - if (init_quad_umath() < 0) { - goto error; - } - - return m; - - -error: - Py_DECREF(m); - return NULL; -} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/scalar.c b/quaddtype/quaddtype/src/scalar.c deleted file mode 100644 index 15f727d8..00000000 --- a/quaddtype/quaddtype/src/scalar.c +++ /dev/null @@ -1,126 +0,0 @@ -#include -#include -#include - -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/dtype_api.h" - -#include "scalar.h" -#include "scalar_ops.h" - - -// static PyTypeObject QuadPrecision_Type; - - -QuadPrecisionObject * QuadPrecision_raw_new(void) -{ - QuadPrecisionObject * new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type); - if(!new) - return NULL; - new->quad.value = Sleef_cast_from_doubleq1(0.0); // initialize to 0 - return new; -} - -QuadPrecisionObject * QuadPrecision_from_object(PyObject * value) -{ - QuadPrecisionObject *self = QuadPrecision_raw_new(); - if(!self) - return NULL; - - if(PyFloat_Check(value)) - self->quad.value = Sleef_cast_from_doubleq1(PyFloat_AsDouble(value)); - - else if(PyUnicode_CheckExact(value)) - { - const char * s = PyUnicode_AsUTF8(value); - char *endptr = NULL; - self->quad.value = Sleef_strtoq(s, &endptr); - if (*endptr != '\0' || endptr == s) - { - PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision"); - Py_DECREF(self); - return NULL; - } - } - else if(PyLong_Check(value)) - { - long int val = PyLong_AsLong(value); - if (val == -1) - { - PyErr_SetString(PyExc_OverflowError, "Overflow Error, value out of range"); - Py_DECREF(self); - return NULL; - } - self->quad.value = Sleef_cast_from_int64q1(val); - } - else - { - PyErr_SetString(PyExc_TypeError, "QuadPrecision value must be a float, int or string"); - Py_DECREF(self); - return NULL; - } - - return self; -} - -static PyObject * -QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs) -{ - PyObject *value; - - if (!PyArg_ParseTuple(args, "O", &value)) { - return NULL; - } - - return (PyObject *)QuadPrecision_from_object(value); -} - -static PyObject * QuadPrecision_str(QuadPrecisionObject * self) -{ - char buffer[128]; - Sleef_snprintf(buffer, sizeof(buffer), "%.*Qe", SLEEF_QUAD_DIG, self->quad.value); - return PyUnicode_FromString(buffer); -} - -static PyObject * QuadPrecision_repr(QuadPrecisionObject* self) -{ - PyObject *str = QuadPrecision_str(self); - if (str == NULL) { - return NULL; - } - PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S')", str); - Py_DECREF(str); - return res; -} - -static void -quad_dealloc(QuadPrecDTypeObject *self) -{ - Py_TYPE(self)->tp_free((PyObject *)self); -} - -PyTypeObject QuadPrecision_Type = -{ - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "QuadPrecType.QuadPrecision", - .tp_basicsize = sizeof(QuadPrecisionObject), - .tp_itemsize = 0, - .tp_new = QuadPrecision_new, - .tp_dealloc = (destructor)quad_dealloc, - .tp_repr = (reprfunc)QuadPrecision_repr, - .tp_str = (reprfunc)QuadPrecision_str, - .tp_as_number = &quad_as_scalar, - .tp_richcompare = (richcmpfunc)quad_richcompare - -}; - -int -init_quadprecision_scalar(void) -{ - return PyType_Ready(&QuadPrecision_Type); -} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/scalar.h b/quaddtype/quaddtype/src/scalar.h deleted file mode 100644 index 962a1fec..00000000 --- a/quaddtype/quaddtype/src/scalar.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _QUADDTYPE_SCALAR_H -#define _QUADDTYPE_SCALAR_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -typedef struct { - Sleef_quad value; -} quad_field; - -typedef struct { - PyObject_HEAD - quad_field quad; -} QuadPrecisionObject; - -extern PyTypeObject QuadPrecision_Type; - -QuadPrecisionObject * -QuadPrecision_raw_new(void); - -QuadPrecisionObject * -QuadPrecision_from_object(PyObject *value); - -int -init_quadprecision_scalar(void); - -#ifdef __cplusplus -} -#endif - -#endif \ No newline at end of file diff --git a/quaddtype/quaddtype/src/scalar_ops.cpp b/quaddtype/quaddtype/src/scalar_ops.cpp deleted file mode 100644 index a51bc69a..00000000 --- a/quaddtype/quaddtype/src/scalar_ops.cpp +++ /dev/null @@ -1,171 +0,0 @@ -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY - -extern "C" { -#include - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/ufuncobject.h" - -#include "numpy/dtype_api.h" -} - -#include "scalar.h" -#include "ops.hpp" -#include "scalar_ops.h" - -template -static PyObject * -quad_unary_func(QuadPrecisionObject *self) -{ - QuadPrecisionObject *res = QuadPrecision_raw_new(); - if (!res) { - return NULL; - } - - unary_op(&self->quad.value, &res->quad.value); - return (PyObject *)res; -} - -PyObject * -quad_nonzero(QuadPrecisionObject *self) -{ - return PyBool_FromLong(Sleef_icmpneq1(self->quad.value, Sleef_cast_from_int64q1(0))); -} - -template -static PyObject * -quad_binary_func(PyObject *op1, PyObject *op2) -{ - QuadPrecisionObject *self; - PyObject *other; - QuadPrecisionObject *other_quad = NULL; - int is_forward; - - if (PyObject_TypeCheck(op1, &QuadPrecision_Type)) { - is_forward = 1; - self = (QuadPrecisionObject *)op1; - other = Py_NewRef(op2); - } - else { - is_forward = 0; - self = (QuadPrecisionObject *)op2; - other = Py_NewRef(op1); - } - - if (PyObject_TypeCheck(other, &QuadPrecision_Type)) { - Py_INCREF(other); - other_quad = (QuadPrecisionObject *)other; - } - else if (PyLong_Check(other) || PyFloat_Check(other)) { - other_quad = QuadPrecision_raw_new(); - if (!other_quad) { - Py_DECREF(other); - return NULL; - } - - if (PyLong_Check(other)) { - long long value = PyLong_AsLongLong(other); - if (value == -1 && PyErr_Occurred()) { - Py_DECREF(other); - Py_DECREF(other_quad); - return NULL; - } - other_quad->quad.value = Sleef_cast_from_int64q1(value); - } - else { - double value = PyFloat_AsDouble(other); - if (value == -1.0 && PyErr_Occurred()) { - Py_DECREF(other); - Py_DECREF(other_quad); - return NULL; - } - other_quad->quad.value = Sleef_cast_from_doubleq1(value); - } - } - else { - Py_DECREF(other); - Py_RETURN_NOTIMPLEMENTED; - } - - QuadPrecisionObject *res = QuadPrecision_raw_new(); - if (!res) { - Py_DECREF(other_quad); - Py_DECREF(other); - return NULL; - } - - if (is_forward) { - binary_op(&res->quad.value, &self->quad.value, &other_quad->quad.value); - } - else { - binary_op(&res->quad.value, &other_quad->quad.value, &self->quad.value); - } - - Py_DECREF(other_quad); - Py_DECREF(other); - return (PyObject *)res; -} - -// todo: add support with float and int -PyObject * -quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op) -{ - QuadPrecisionObject *other_quad = NULL; - - if (PyObject_TypeCheck(other, &QuadPrecision_Type)) { - Py_INCREF(other); - other_quad = (QuadPrecisionObject *)other; - } - else if (PyLong_CheckExact(other) || PyFloat_CheckExact(other)) { - other_quad = QuadPrecision_from_object(other); - if (other_quad == NULL) { - return NULL; - } - } - else { - Py_RETURN_NOTIMPLEMENTED; - } - int cmp; - switch (cmp_op) { - case Py_LT: - cmp = Sleef_icmpltq1(self->quad.value, other_quad->quad.value); - break; - case Py_LE: - cmp = Sleef_icmpleq1(self->quad.value, other_quad->quad.value); - break; - case Py_EQ: - cmp = Sleef_icmpeqq1(self->quad.value, other_quad->quad.value); - break; - case Py_NE: - cmp = Sleef_icmpneq1(self->quad.value, other_quad->quad.value); - break; - case Py_GT: - cmp = Sleef_icmpgtq1(self->quad.value, other_quad->quad.value); - break; - case Py_GE: - cmp = Sleef_icmpgeq1(self->quad.value, other_quad->quad.value); - break; - default: - Py_DECREF(other_quad); - Py_RETURN_NOTIMPLEMENTED; - } - Py_DECREF(other_quad); - - return PyBool_FromLong(cmp); -} - -PyNumberMethods quad_as_scalar = { - .nb_add = (binaryfunc)quad_binary_func, - .nb_subtract = (binaryfunc)quad_binary_func, - .nb_multiply = (binaryfunc)quad_binary_func, - .nb_power = (ternaryfunc)quad_binary_func, - .nb_negative = (unaryfunc)quad_unary_func, - .nb_positive = (unaryfunc)quad_unary_func, - .nb_absolute = (unaryfunc)quad_unary_func, - .nb_bool = (inquiry)quad_nonzero, - .nb_true_divide = (binaryfunc)quad_binary_func, -}; \ No newline at end of file diff --git a/quaddtype/quaddtype/src/umath.cpp b/quaddtype/quaddtype/src/umath.cpp deleted file mode 100644 index 9007f890..00000000 --- a/quaddtype/quaddtype/src/umath.cpp +++ /dev/null @@ -1,576 +0,0 @@ -#include "scalar.h" - -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC - -extern "C" { -#include -#include - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/ufuncobject.h" - -#include "numpy/dtype_api.h" -} -#include "dtype.h" -#include "umath.h" -#include "ops.hpp" - -template -int -quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - Sleef_quad in, out; - while (N--) { - memcpy(&in, in_ptr, sizeof(Sleef_quad)); - unary_op(&in, &out); - memcpy(out_ptr, &out, sizeof(Sleef_quad)); - - in_ptr += in_stride; - out_ptr += out_stride; - } - return 0; -} - -static NPY_CASTING -quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], - npy_intp *NPY_UNUSED(view_offset)) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - if (given_descrs[1] == NULL) { - Py_INCREF(given_descrs[0]); - loop_descrs[1] = given_descrs[0]; - return NPY_NO_CASTING; - } - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - - return NPY_NO_CASTING; -} - -template -int -create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[2] = {&QuadPrecDType, &QuadPrecDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_generic_unary_op_strided_loop}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_unary_op", - .nin = 1, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - return 0; -} - -int -init_quad_unary_ops(PyObject *numpy) -{ - if (create_quad_unary_ufunc(numpy, "negative") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "absolute") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "rint") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "trunc") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "floor") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "ceil") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "square") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log2") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log10") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log1p") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "exp") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "exp2") < 0) { - return -1; - } - return 0; -} - -// Binary ufuncs - -template -int -quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - Sleef_quad in1, in2, out; - while (N--) { - memcpy(&in1, in1_ptr, sizeof(Sleef_quad)); - memcpy(&in2, in2_ptr, sizeof(Sleef_quad)); - binop(&out, &in1, &in2); - memcpy(out_ptr, &out, sizeof(Sleef_quad)); - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -static NPY_CASTING -quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], - PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - - if (given_descrs[2] == NULL) { - PyArray_Descr *out_descr = (PyArray_Descr *)new_quaddtype_instance(); - if (!out_descr) { - return (NPY_CASTING)-1; - } - Py_INCREF(given_descrs[0]); - loop_descrs[2] = out_descr; - } - else { - Py_INCREF(given_descrs[2]); - loop_descrs[2] = given_descrs[2]; - } - - return NPY_NO_CASTING; -} - -// helper debugging function -static const char * -get_dtype_name(PyArray_DTypeMeta *dtype) -{ - if (dtype == &QuadPrecDType) { - return "QuadPrecDType"; - } - else if (dtype == &PyArray_BoolDType) { - return "BoolDType"; - } - else if (dtype == &PyArray_ByteDType) { - return "ByteDType"; - } - else if (dtype == &PyArray_UByteDType) { - return "UByteDType"; - } - else if (dtype == &PyArray_ShortDType) { - return "ShortDType"; - } - else if (dtype == &PyArray_UShortDType) { - return "UShortDType"; - } - else if (dtype == &PyArray_IntDType) { - return "IntDType"; - } - else if (dtype == &PyArray_UIntDType) { - return "UIntDType"; - } - else if (dtype == &PyArray_LongDType) { - return "LongDType"; - } - else if (dtype == &PyArray_ULongDType) { - return "ULongDType"; - } - else if (dtype == &PyArray_LongLongDType) { - return "LongLongDType"; - } - else if (dtype == &PyArray_ULongLongDType) { - return "ULongLongDType"; - } - else if (dtype == &PyArray_FloatDType) { - return "FloatDType"; - } - else if (dtype == &PyArray_DoubleDType) { - return "DoubleDType"; - } - else if (dtype == &PyArray_LongDoubleDType) { - return "LongDoubleDType"; - } - else { - return "UnknownDType"; - } -} - -static int -quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], - PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) -{ - // printf("quad_ufunc_promoter called for ufunc: %s\n", ufunc->name); - // printf("Entering quad_ufunc_promoter\n"); - // printf("Ufunc name: %s\n", ufunc->name); - // printf("nin: %d, nargs: %d\n", ufunc->nin, ufunc->nargs); - - int nin = ufunc->nin; - int nargs = ufunc->nargs; - PyArray_DTypeMeta *common = NULL; - bool has_quad = false; - - // Handle the special case for reductions - if (op_dtypes[0] == NULL) { - assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ - for (int i = 0; i < 3; i++) { - Py_INCREF(op_dtypes[1]); - new_op_dtypes[i] = op_dtypes[1]; - // printf("new_op_dtypes[%d] set to %s\n", i, get_dtype_name(new_op_dtypes[i])); - } - return 0; - } - - // Check if any input or signature is QuadPrecision - for (int i = 0; i < nargs; i++) { - if ((i < nin && op_dtypes[i] == &QuadPrecDType) || (signature[i] == &QuadPrecDType)) { - has_quad = true; - // printf("QuadPrecision detected in input %d or signature\n", i); - break; - } - } - - if (has_quad) { - // If QuadPrecision is involved, use it for all arguments - common = &QuadPrecDType; - // printf("Using QuadPrecDType as common type\n"); - } - else { - // Check if output signature is homogeneous - for (int i = nin; i < nargs; i++) { - if (signature[i] != NULL) { - if (common == NULL) { - Py_INCREF(signature[i]); - common = signature[i]; - // printf("Common type set to %s from signature\n", get_dtype_name(common)); - } - else if (common != signature[i]) { - Py_CLEAR(common); // Not homogeneous, unset common - // printf("Output signature not homogeneous, cleared common type\n"); - break; - } - } - } - - // If no common output dtype, use standard promotion for inputs - if (common == NULL) { - // printf("Using standard promotion for inputs\n"); - common = PyArray_PromoteDTypeSequence(nin, op_dtypes); - if (common == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Clear(); // Do not propagate normal promotion errors - } - // printf("Exiting quad_ufunc_promoter (promotion failed)\n"); - return -1; - } - // printf("Common type after promotion: %s\n", get_dtype_name(common)); - } - } - - // Set all new_op_dtypes to the common dtype - for (int i = 0; i < nargs; i++) { - if (signature[i]) { - // If signature is specified for this argument, use it - Py_INCREF(signature[i]); - new_op_dtypes[i] = signature[i]; - // printf("new_op_dtypes[%d] set to %s (from signature)\n", i, - // get_dtype_name(new_op_dtypes[i])); - } - else { - // Otherwise, use the common dtype - Py_INCREF(common); - new_op_dtypes[i] = common; - // printf("new_op_dtypes[%d] set to %s (from common)\n", i, - // get_dtype_name(new_op_dtypes[i])); - } - } - - Py_XDECREF(common); - // printf("Exiting quad_ufunc_promoter\n"); - return 0; -} - -template -int -create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - Py_DecRef(ufunc); - return -1; - } - - PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_generic_binop_strided_loop}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_binop", - .nin = 2, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - PyObject *promoter_capsule = - PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); - if (promoter_capsule == NULL) { - return -1; - } - - PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); - if (DTypes == 0) { - Py_DECREF(promoter_capsule); - return -1; - } - - if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return -1; - } - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return 0; -} - -int -init_quad_binary_ops(PyObject *numpy) -{ - if (create_quad_binary_ufunc(numpy, "add") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "subtract") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "multiply") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "divide") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "power") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "mod") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "minimum") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "maximum") < 0) { - return -1; - } - return 0; -} - -// comparison functions - -template -int -quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - while (N--) { - *((npy_bool *)out_ptr) = comp((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -NPY_NO_EXPORT int -comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], - PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) -{ - PyArray_DTypeMeta *new_signature[NPY_MAXARGS]; - - memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *)); - new_signature[2] = NULL; - int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes); - if (res < 0) { - return -1; - } - Py_XSETREF(new_op_dtypes[2], &PyArray_BoolDType); - return 0; -} - -template -int -create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; - - PyType_Slot slots[] = {{NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_comp", - .nin = 2, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - PyObject *promoter_capsule = - PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); - if (promoter_capsule == NULL) { - return -1; - } - - PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType); - if (DTypes == 0) { - Py_DECREF(promoter_capsule); - return -1; - } - - if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return -1; - } - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - - return 0; -} - -int -init_quad_comps(PyObject *numpy) -{ - if (create_quad_comparison_ufunc(numpy, "equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "less") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "greater") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "greater_equal") < 0) { - return -1; - } - - return 0; -} - -int -init_quad_umath(void) -{ - PyObject *numpy = PyImport_ImportModule("numpy"); - if (!numpy) - return -1; - - if (init_quad_unary_ops(numpy) < 0) { - goto err; - } - - if (init_quad_binary_ops(numpy) < 0) { - goto err; - } - - if (init_quad_comps(numpy) < 0) { - goto err; - } - - Py_DECREF(numpy); - return 0; - -err: - Py_DECREF(numpy); - return -1; -} \ No newline at end of file diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh index 31479511..5131f15a 100755 --- a/quaddtype/reinstall.sh +++ b/quaddtype/reinstall.sh @@ -8,6 +8,6 @@ then fi #meson setup build -Db_sanitize=address,undefined -python -m pip uninstall -y quaddtype -python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" -#python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' \ No newline at end of file +python -m pip uninstall -y numpy_quaddtype +# python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" +python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' \ No newline at end of file diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 20ccd91c..11cf7671 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -3,7 +3,7 @@ import numpy as np import operator -from quaddtype import QuadPrecDType, QuadPrecision +from numpy_quaddtype import QuadPrecDType, QuadPrecision def test_create_scalar_simple(): @@ -17,12 +17,6 @@ def test_basic_equality(): "12.0") == QuadPrecision("12.00") -@pytest.mark.parametrize("val", ["123532.543", "12893283.5"]) -def test_scalar_repr(val): - expected = f"QuadPrecision('{str(QuadPrecision(val))}')" - assert repr(QuadPrecision(val)) == expected - - @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "pow"]) @pytest.mark.parametrize("other", ["3.0", "12.5", "100.0"]) def test_binary_ops(op, other):