From 0a36a9d70d9e79f5ad07dddb68ebffaadcb616d1 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sun, 25 Aug 2024 12:02:33 +0530
Subject: [PATCH 01/32] added readme

---
 quaddtype/README.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/quaddtype/README.md b/quaddtype/README.md
index e69de29b..97425387 100644
--- a/quaddtype/README.md
+++ b/quaddtype/README.md
@@ -0,0 +1,17 @@
+# Numpy-QuadDType
+
+## Installation
+
+```
+pip install numpy==2.1.0
+pip install -i https://test.pypi.org/simple/ quaddtype
+```
+
+## Usage
+
+```
+import numpy as np
+from quaddtype import QuadPrecDType, QuadPrecision
+
+np.array([1,2,3], dtype=QuadPrecDType())
+```

From d56e59278b3dceabb6cc9bd373872d9f392f52ed Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sat, 31 Aug 2024 22:37:00 +0530
Subject: [PATCH 02/32] renaming module and adding initial longdouble backend
 support

---
 quaddtype/meson.build                         |  38 +-
 quaddtype/numpy_quaddtype/__init__.py         |  23 +
 quaddtype/numpy_quaddtype/src/casts.cpp       | 477 ++++++++++++++++++
 .../src/casts.h                               |   0
 quaddtype/numpy_quaddtype/src/dtype.c         | 241 +++++++++
 .../src/dtype.h                               |  18 +-
 .../src/ops.hpp                               |   0
 quaddtype/numpy_quaddtype/src/quad_common.h   |  17 +
 .../src/quaddtype_main.c                      |  26 +-
 quaddtype/numpy_quaddtype/src/scalar.c        | 159 ++++++
 .../src/scalar.h                              |  15 +-
 .../src/scalar_ops.cpp                        |   0
 .../src/scalar_ops.h                          |   0
 .../src/umath.cpp                             |   0
 .../src/umath.h                               |   0
 quaddtype/pyproject.toml                      |   4 +-
 quaddtype/quaddtype/__init__.py               |   1 -
 quaddtype/quaddtype/src/casts.cpp             | 468 -----------------
 quaddtype/quaddtype/src/dtype.c               | 216 --------
 quaddtype/quaddtype/src/scalar.c              | 126 -----
 quaddtype/reinstall.sh                        |   2 +-
 21 files changed, 969 insertions(+), 862 deletions(-)
 create mode 100644 quaddtype/numpy_quaddtype/__init__.py
 create mode 100644 quaddtype/numpy_quaddtype/src/casts.cpp
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/casts.h (100%)
 create mode 100644 quaddtype/numpy_quaddtype/src/dtype.c
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/dtype.h (53%)
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/ops.hpp (100%)
 create mode 100644 quaddtype/numpy_quaddtype/src/quad_common.h
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/quaddtype_main.c (68%)
 create mode 100644 quaddtype/numpy_quaddtype/src/scalar.c
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/scalar.h (55%)
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/scalar_ops.cpp (100%)
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/scalar_ops.h (100%)
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/umath.cpp (100%)
 rename quaddtype/{quaddtype => numpy_quaddtype}/src/umath.h (100%)
 delete mode 100644 quaddtype/quaddtype/__init__.py
 delete mode 100644 quaddtype/quaddtype/src/casts.cpp
 delete mode 100644 quaddtype/quaddtype/src/dtype.c
 delete mode 100644 quaddtype/quaddtype/src/scalar.c

diff --git a/quaddtype/meson.build b/quaddtype/meson.build
index f1f9eab1..50dc67b1 100644
--- a/quaddtype/meson.build
+++ b/quaddtype/meson.build
@@ -1,12 +1,12 @@
-project('quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true'])
+project('numpy_quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true'])
 
 py_mod = import('python')
 py = py_mod.find_installation()
 
 c = meson.get_compiler('c')
 
-sleef_dep = c.find_library('sleef')
-sleefquad_dep = c.find_library('sleefquad')
+sleef_dep = c.find_library('sleef', dirs:['/usr/local/lib'])
+sleefquad_dep = c.find_library('sleefquad', dirs:['/usr/local/lib'])
 
 incdir_numpy = run_command(py,
   [
@@ -19,30 +19,30 @@ incdir_numpy = run_command(py,
 includes = include_directories(
     [
         incdir_numpy,
-        'quaddtype/src',
+        'numpy_quaddtype/src',
     ]
 )
 
 srcs = [
-    'quaddtype/src/casts.h',
-    'quaddtype/src/casts.cpp',
-    'quaddtype/src/scalar.h',
-    'quaddtype/src/scalar.c',
-    'quaddtype/src/dtype.h',
-    'quaddtype/src/dtype.c',
-    'quaddtype/src/quaddtype_main.c',
-    'quaddtype/src/scalar_ops.h',
-    'quaddtype/src/scalar_ops.cpp',
-    'quaddtype/src/ops.hpp',
-    'quaddtype/src/umath.h',
-    'quaddtype/src/umath.cpp'
+    'numpy_quaddtype/src/casts.h',
+    'numpy_quaddtype/src/casts.cpp',
+    'numpy_quaddtype/src/scalar.h',
+    'numpy_quaddtype/src/scalar.c',
+    'numpy_quaddtype/src/dtype.h',
+    'numpy_quaddtype/src/dtype.c',
+    'numpy_quaddtype/src/quaddtype_main.c',
+    # 'numpy_quaddtype/src/scalar_ops.h',
+    # 'numpy_quaddtype/src/scalar_ops.cpp',
+    # 'numpy_quaddtype/src/ops.hpp',
+    # 'numpy_quaddtype/src/umath.h',
+    # 'numpy_quaddtype/src/umath.cpp'
 ]
 
 py.install_sources(
     [
-        'quaddtype/__init__.py',
+        'numpy_quaddtype/__init__.py',
     ],
-    subdir: 'quaddtype',
+    subdir: 'numpy_quaddtype',
     pure: false
 )
 
@@ -51,6 +51,6 @@ srcs,
 c_args: ['-g', '-O0', '-lsleef', '-lsleefquad'],
 dependencies: [sleef_dep, sleefquad_dep],
 install: true,
-subdir: 'quaddtype',
+subdir: 'numpy_quaddtype',
 include_directories: includes
 )
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py
new file mode 100644
index 00000000..5ee5ab30
--- /dev/null
+++ b/quaddtype/numpy_quaddtype/__init__.py
@@ -0,0 +1,23 @@
+from ._quaddtype_main import (
+    QuadPrecision,
+    QuadPrecDType
+)
+
+__all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision',
+           'SleefQuadPrecDType', 'LongDoubleQuadPrecDType']
+
+
+def SleefQuadPrecision(value):
+    return QuadPrecision(value, backend='sleef')
+
+
+def LongDoubleQuadPrecision(value):
+    return QuadPrecision(value, backend='longdouble')
+
+
+def SleefQuadPrecDType():
+    return QuadPrecDType(backend='sleef')
+
+
+def LongDoubleQuadPrecDType():
+    return QuadPrecDType(backend='longdouble')
diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
new file mode 100644
index 00000000..44c316e0
--- /dev/null
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -0,0 +1,477 @@
+#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
+#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
+#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
+#define NPY_TARGET_VERSION NPY_2_0_API_VERSION
+#define NO_IMPORT_ARRAY
+#define NO_IMPORT_UFUNC
+
+extern "C" {
+#include <Python.h>
+
+#include "numpy/arrayobject.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/dtype_api.h"
+}
+#include "sleef.h"
+#include "sleefquad.h"
+
+#include "quad_common.h"
+#include "scalar.h"
+#include "casts.h"
+#include "dtype.h"
+
+#define NUM_CASTS 29  // 14 to_casts + 14 from_casts + 1 quad_to_quad
+
+static NPY_CASTING
+quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
+                                 PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
+                                 QuadPrecDTypeObject *given_descrs[2],
+                                 QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset)
+{
+    if (given_descrs[0]->backend != given_descrs[1]->backend) {
+        return NPY_UNSAFE_CASTING;
+    }
+
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+
+    if (given_descrs[1] == NULL) {
+        Py_INCREF(given_descrs[0]);
+        loop_descrs[1] = given_descrs[0];
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    *view_offset = 0;
+    return NPY_NO_CASTING;
+}
+
+static int
+quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                          npy_intp const dimensions[], npy_intp const strides[],
+                          void *NPY_UNUSED(auxdata))
+{
+    npy_intp N = dimensions[0];
+    char *in_ptr = data[0];
+    char *out_ptr = data[1];
+
+    npy_intp in_stride = strides[0];
+    npy_intp out_stride = strides[1];
+
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    size_t elem_size = (descr->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
+    while (N--) {
+        memcpy(out_ptr, in_ptr, elem_size);
+        in_ptr += in_stride;
+        out_ptr += out_stride;
+    }
+    return 0;
+}
+
+// Casting from other types to QuadDType
+
+// template <typename T>
+// static inline Sleef_quad
+// to_quad(T x);
+
+// template <>
+// inline Sleef_quad
+// to_quad<npy_bool>(npy_bool x)
+// {
+//     return x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_byte>(npy_byte x)
+// {
+//     return Sleef_cast_from_int64q1(x);
+// }
+// // template <>
+// // inline Sleef_quad
+// // to_quad<npy_ubyte>(npy_ubyte x)
+// // {
+// //     return Sleef_cast_from_uint64q1(x);
+// // }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_short>(npy_short x)
+// {
+//     return Sleef_cast_from_int64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_ushort>(npy_ushort x)
+// {
+//     return Sleef_cast_from_uint64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_int>(npy_int x)
+// {
+//     return Sleef_cast_from_int64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_uint>(npy_uint x)
+// {
+//     return Sleef_cast_from_uint64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_long>(npy_long x)
+// {
+//     return Sleef_cast_from_int64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_ulong>(npy_ulong x)
+// {
+//     return Sleef_cast_from_uint64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_longlong>(npy_longlong x)
+// {
+//     return Sleef_cast_from_int64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<npy_ulonglong>(npy_ulonglong x)
+// {
+//     return Sleef_cast_from_uint64q1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<float>(float x)
+// {
+//     return Sleef_cast_from_doubleq1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<double>(double x)
+// {
+//     return Sleef_cast_from_doubleq1(x);
+// }
+// template <>
+// inline Sleef_quad
+// to_quad<long double>(long double x)
+// {
+//     return Sleef_cast_from_doubleq1(x);
+// }
+
+// template <typename T>
+// static NPY_CASTING
+// numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
+//                                   PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
+//                                   npy_intp *view_offset)
+// {
+//     if (given_descrs[1] == NULL) {
+//         loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance();
+//         if (loop_descrs[1] == nullptr) {
+//             return (NPY_CASTING)-1;
+//         }
+//     }
+//     else {
+//         Py_INCREF(given_descrs[1]);
+//         loop_descrs[1] = given_descrs[1];
+//     }
+
+//     loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]);
+//     // *view_offset = 0;
+//     return NPY_SAFE_CASTING;
+// }
+
+// template <typename T>
+// static int
+// numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
+//                            npy_intp const dimensions[], npy_intp const strides[],
+//                            void *NPY_UNUSED(auxdata))
+// {
+//     npy_intp N = dimensions[0];
+//     char *in_ptr = data[0];
+//     char *out_ptr = data[1];
+
+//     while (N--) {
+//         T in_val;
+//         Sleef_quad out_val;
+
+//         memcpy(&in_val, in_ptr, sizeof(T));
+//         out_val = to_quad<T>(in_val);
+//         memcpy(out_ptr, &out_val, sizeof(Sleef_quad));
+
+//         in_ptr += strides[0];
+//         out_ptr += strides[1];
+//     }
+//     return 0;
+// }
+
+// // Casting from QuadDType to other types
+
+// template <typename T>
+// static inline T
+// from_quad(Sleef_quad x);
+
+// template <>
+// inline npy_bool
+// from_quad<npy_bool>(Sleef_quad x)
+// {
+//     return Sleef_cast_to_int64q1(x) != 0;
+// }
+// template <>
+// inline npy_byte
+// from_quad<npy_byte>(Sleef_quad x)
+// {
+//     return (npy_byte)Sleef_cast_to_int64q1(x);
+// }
+// // template <>
+// // inline npy_ubyte
+// // from_quad<npy_ubyte>(Sleef_quad x)
+// // {
+// //     return (npy_ubyte)Sleef_cast_to_uint64q1(x);
+// // }
+// template <>
+// inline npy_short
+// from_quad<npy_short>(Sleef_quad x)
+// {
+//     return (npy_short)Sleef_cast_to_int64q1(x);
+// }
+// template <>
+// inline npy_ushort
+// from_quad<npy_ushort>(Sleef_quad x)
+// {
+//     return (npy_ushort)Sleef_cast_to_uint64q1(x);
+// }
+// template <>
+// inline npy_int
+// from_quad<npy_int>(Sleef_quad x)
+// {
+//     return (npy_int)Sleef_cast_to_int64q1(x);
+// }
+// template <>
+// inline npy_uint
+// from_quad<npy_uint>(Sleef_quad x)
+// {
+//     return (npy_uint)Sleef_cast_to_uint64q1(x);
+// }
+// template <>
+// inline npy_long
+// from_quad<npy_long>(Sleef_quad x)
+// {
+//     return (npy_long)Sleef_cast_to_int64q1(x);
+// }
+// template <>
+// inline npy_ulong
+// from_quad<npy_ulong>(Sleef_quad x)
+// {
+//     return (npy_ulong)Sleef_cast_to_uint64q1(x);
+// }
+// template <>
+// inline npy_longlong
+// from_quad<npy_longlong>(Sleef_quad x)
+// {
+//     return Sleef_cast_to_int64q1(x);
+// }
+// template <>
+// inline npy_ulonglong
+// from_quad<npy_ulonglong>(Sleef_quad x)
+// {
+//     return Sleef_cast_to_uint64q1(x);
+// }
+// template <>
+// inline float
+// from_quad<float>(Sleef_quad x)
+// {
+//     return Sleef_cast_to_doubleq1(x);
+// }
+// template <>
+// inline double
+// from_quad<double>(Sleef_quad x)
+// {
+//     return Sleef_cast_to_doubleq1(x);
+// }
+// template <>
+// inline long double
+// from_quad<long double>(Sleef_quad x)
+// {
+//     return Sleef_cast_to_doubleq1(x);
+// }
+
+// template <typename T>
+// static NPY_CASTING
+// quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
+//                                   PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
+//                                   npy_intp *view_offset)
+// {
+//     Py_INCREF(given_descrs[0]);
+//     loop_descrs[0] = given_descrs[0];
+
+//     loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]);
+//     // *view_offset = 0;
+//     return NPY_UNSAFE_CASTING;
+// }
+
+// template <typename T>
+// static int
+// quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[],
+//                            npy_intp const dimensions[], npy_intp const strides[],
+//                            void *NPY_UNUSED(auxdata))
+// {
+//     npy_intp N = dimensions[0];
+//     char *in_ptr = data[0];
+//     char *out_ptr = data[1];
+
+//     while (N--) {
+//         Sleef_quad in_val = *(Sleef_quad *)in_ptr;
+//         T *out_val = (T *)out_ptr;
+//         *out_val = from_quad<T>(in_val);
+
+//         in_ptr += strides[0];
+//         out_ptr += strides[1];
+//     }
+//     return 0;
+// }
+
+static PyArrayMethod_Spec *specs[NUM_CASTS + 1];  // +1 for NULL terminator
+static size_t spec_count = 0;
+
+void
+add_spec(PyArrayMethod_Spec *spec)
+{
+    if (spec_count < NUM_CASTS) {
+        specs[spec_count++] = spec;
+    }
+}
+
+// // functions to add casts
+// template <typename T>
+// void
+// add_cast_from(PyArray_DTypeMeta *to)
+// {
+//     PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{nullptr, to};
+
+//     PyType_Slot *slots = new PyType_Slot[3]{
+//             {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors<T>},
+//             {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop<T>},
+//             {0, nullptr}};
+
+//     PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
+//             .name = "cast_QuadPrec_to_NumPy",
+//             .nin = 1,
+//             .nout = 1,
+//             .casting = NPY_UNSAFE_CASTING,
+//             .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+//             .dtypes = dtypes,
+//             .slots = slots,
+//     };
+//     add_spec(spec);
+// }
+
+// template <typename T>
+// void
+// add_cast_to(PyArray_DTypeMeta *from)
+// {
+//     PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, nullptr};
+
+//     PyType_Slot *slots = new PyType_Slot[3]{
+//             {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors<T>},
+//             {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop<T>},
+//             {0, nullptr}};
+
+//     PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
+//             .name = "cast_NumPy_to_QuadPrec",
+//             .nin = 1,
+//             .nout = 1,
+//             .casting = NPY_SAFE_CASTING,
+//             .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+//             .dtypes = dtypes,
+//             .slots = slots,
+//     };
+
+//     add_spec(spec);
+// }
+
+PyArrayMethod_Spec **
+init_casts_internal(void)
+{
+    PyArray_DTypeMeta **quad2quad_dtypes =
+            new PyArray_DTypeMeta *[2]{&QuadPrecDType, &QuadPrecDType};
+    PyType_Slot *quad2quad_slots = new PyType_Slot[4]{
+            {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
+            {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop},
+            {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop},
+            {0, nullptr}};
+
+    PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{
+            .name = "cast_QuadPrec_to_QuadPrec",
+            .nin = 1,
+            .nout = 1,
+            .casting = NPY_NO_CASTING,
+            .flags = NPY_METH_SUPPORTS_UNALIGNED,
+            .dtypes = quad2quad_dtypes,
+            .slots = quad2quad_slots,
+    };
+
+    add_spec(quad2quad_spec);
+
+    // add_cast_to<npy_bool>(&PyArray_BoolDType);
+    // add_cast_to<npy_byte>(&PyArray_ByteDType);
+    // add_cast_to<npy_ubyte>(&PyArray_UByteDType);
+    // add_cast_to<npy_short>(&PyArray_ShortDType);
+    // add_cast_to<npy_ushort>(&PyArray_UShortDType);
+    // add_cast_to<npy_int>(&PyArray_IntDType);
+    // add_cast_to<npy_uint>(&PyArray_UIntDType);
+    // add_cast_to<npy_long>(&PyArray_LongDType);
+    // add_cast_to<npy_ulong>(&PyArray_ULongDType);
+    // add_cast_to<npy_longlong>(&PyArray_LongLongDType);
+    // add_cast_to<npy_ulonglong>(&PyArray_ULongLongDType);
+    // add_cast_to<float>(&PyArray_FloatDType);
+    // add_cast_to<double>(&PyArray_DoubleDType);
+    // add_cast_to<long double>(&PyArray_LongDoubleDType);
+
+    // add_cast_from<npy_bool>(&PyArray_BoolDType);
+    // add_cast_from<npy_byte>(&PyArray_ByteDType);
+    // add_cast_from<npy_ubyte>(&PyArray_UByteDType);
+    // add_cast_from<npy_short>(&PyArray_ShortDType);
+    // add_cast_from<npy_ushort>(&PyArray_UShortDType);
+    // add_cast_from<npy_int>(&PyArray_IntDType);
+    // add_cast_from<npy_uint>(&PyArray_UIntDType);
+    // add_cast_from<npy_long>(&PyArray_LongDType);
+    // add_cast_from<npy_ulong>(&PyArray_ULongDType);
+    // add_cast_from<npy_longlong>(&PyArray_LongLongDType);
+    // add_cast_from<npy_ulonglong>(&PyArray_ULongLongDType);
+    // add_cast_from<float>(&PyArray_FloatDType);
+    // add_cast_from<double>(&PyArray_DoubleDType);
+    // add_cast_from<long double>(&PyArray_LongDoubleDType);
+
+    specs[spec_count] = nullptr;
+    return specs;
+}
+
+PyArrayMethod_Spec **
+init_casts(void)
+{
+    try {
+        return init_casts_internal();
+    }
+    catch (int e) {
+        PyErr_NoMemory();
+        return nullptr;
+    }
+}
+
+void
+free_casts(void)
+{
+    for (auto cast : specs) {
+        if (cast == nullptr) {
+            continue;
+        }
+        delete[] cast->dtypes;
+        delete[] cast->slots;
+        delete cast;
+    }
+    spec_count = 0;
+}
\ No newline at end of file
diff --git a/quaddtype/quaddtype/src/casts.h b/quaddtype/numpy_quaddtype/src/casts.h
similarity index 100%
rename from quaddtype/quaddtype/src/casts.h
rename to quaddtype/numpy_quaddtype/src/casts.h
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
new file mode 100644
index 00000000..a00c37c9
--- /dev/null
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -0,0 +1,241 @@
+#include <Python.h>
+#include <sleef.h>
+#include <sleefquad.h>
+
+#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
+#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
+#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
+#define NPY_TARGET_VERSION NPY_2_0_API_VERSION
+#define NO_IMPORT_ARRAY
+#define NO_IMPORT_UFUNC
+#include "numpy/arrayobject.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/dtype_api.h"
+
+#include "scalar.h"
+#include "casts.h"
+#include "dtype.h"
+
+static inline int
+quad_load(void *x, char *data_ptr, QuadBackendType backend)
+{
+    if (data_ptr == NULL || x == NULL) {
+        return -1;
+    }
+    if (backend == BACKEND_SLEEF) {
+        *(Sleef_quad *)x = *(Sleef_quad *)data_ptr;
+    }
+    else {
+        *(long double *)x = *(long double *)data_ptr;
+    }
+    return 0;
+}
+
+static inline int
+quad_store(char *data_ptr, void *x, QuadBackendType backend)
+{
+    if (data_ptr == NULL || x == NULL) {
+        return -1;
+    }
+    if (backend == BACKEND_SLEEF) {
+        *(Sleef_quad *)data_ptr = *(Sleef_quad *)x;
+    }
+    else {
+        *(long double *)data_ptr = *(long double *)x;
+    }
+    return 0;
+}
+
+QuadPrecDTypeObject *
+new_quaddtype_instance(QuadBackendType backend)
+{
+    QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new(
+            (PyTypeObject *)&QuadPrecDType, NULL, NULL);
+    if (new == NULL) {
+        return NULL;
+    }
+    new->base.elsize = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+    new->base.alignment = (backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double);
+    new->backend = backend;
+
+    return new;
+}
+
+static QuadPrecDTypeObject *
+ensure_canonical(QuadPrecDTypeObject *self)
+{
+    Py_INCREF(self);
+    return self;
+}
+
+static QuadPrecDTypeObject *
+common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
+{
+    if (dtype1->backend != dtype2->backend) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Cannot combine QuadPrecDType instances with different backends");
+        return NULL;
+    }
+    Py_INCREF(dtype1);
+    return dtype1;
+}
+
+static PyArray_DTypeMeta *
+common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
+{
+    // Promote integer and floating-point types to QuadPrecDType
+    if (other->type_num >= 0 &&
+        (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) {
+        Py_INCREF(cls);
+        return cls;
+    }
+    // Don't promote complex types
+    if (PyTypeNum_ISCOMPLEX(other->type_num)) {
+        Py_INCREF(Py_NotImplemented);
+        return (PyArray_DTypeMeta *)Py_NotImplemented;
+    }
+
+    Py_INCREF(Py_NotImplemented);
+    return (PyArray_DTypeMeta *)Py_NotImplemented;
+}
+
+static PyArray_Descr *
+quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj)
+{
+    if (Py_TYPE(obj) != &QuadPrecision_Type) {
+        PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array.");
+        return NULL;
+    }
+    QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
+    return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend);
+}
+
+static int
+quadprec_setitem(QuadPrecDTypeObject *descr, PyObject *obj, char *dataptr)
+{
+    QuadPrecisionObject *value;
+    if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) {
+        Py_INCREF(obj);
+        value = (QuadPrecisionObject *)obj;
+    }
+    else {
+        value = QuadPrecision_from_object(obj, descr->backend);
+        if (value == NULL) {
+            return -1;
+        }
+    }
+
+    if (quad_store(dataptr, &value->value, descr->backend) < 0) {
+        Py_DECREF(value);
+        char error_msg[100];
+        snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void *)dataptr);
+        PyErr_SetString(PyExc_ValueError, error_msg);
+        return -1;
+    }
+
+    Py_DECREF(value);
+    return 0;
+}
+
+static PyObject *
+quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr)
+{
+    QuadPrecisionObject *new = QuadPrecision_raw_new(descr->backend);
+    if (!new) {
+        return NULL;
+    }
+    if (quad_load(&new->value, dataptr, descr->backend) < 0) {
+        Py_DECREF(new);
+        char error_msg[100];
+        snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void *)dataptr);
+        PyErr_SetString(PyExc_ValueError, error_msg);
+        return NULL;
+    }
+    return (PyObject *)new;
+}
+
+static PyArray_Descr *
+quadprec_default_descr(PyArray_DTypeMeta *NPY_UNUSED(cls))
+{
+    return (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF);
+}
+
+static PyType_Slot QuadPrecDType_Slots[] = {
+        {NPY_DT_ensure_canonical, &ensure_canonical},
+        {NPY_DT_common_instance, &common_instance},
+        {NPY_DT_common_dtype, &common_dtype},
+        {NPY_DT_discover_descr_from_pyobject, &quadprec_discover_descriptor_from_pyobject},
+        {NPY_DT_setitem, &quadprec_setitem},
+        {NPY_DT_getitem, &quadprec_getitem},
+        {NPY_DT_default_descr, &quadprec_default_descr},
+        {0, NULL}};
+
+static PyObject *
+QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"backend", NULL};
+    const char *backend_str = "sleef";
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s", kwlist, &backend_str)) {
+        return NULL;
+    }
+
+    QuadBackendType backend = BACKEND_SLEEF;
+    if (strcmp(backend_str, "longdouble") == 0) {
+        backend = BACKEND_LONGDOUBLE;
+    }
+    else if (strcmp(backend_str, "sleef") != 0) {
+        PyErr_SetString(PyExc_ValueError, "Invalid backend. Use 'sleef' or 'longdouble'.");
+        return NULL;
+    }
+
+    return (PyObject *)new_quaddtype_instance(backend);
+}
+
+static PyObject *
+QuadPrecDType_repr(QuadPrecDTypeObject *self)
+{
+    const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
+    return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str);
+}
+
+PyArray_DTypeMeta QuadPrecDType = {
+        {{
+                PyVarObject_HEAD_INIT(NULL, 0).tp_name = "QuadPrecDType.QuadPrecDType",
+                .tp_basicsize = sizeof(QuadPrecDTypeObject),
+                .tp_new = QuadPrecDType_new,
+                .tp_repr = (reprfunc)QuadPrecDType_repr,
+                .tp_str = (reprfunc)QuadPrecDType_repr,
+        }},
+};
+
+int
+init_quadprec_dtype(void)
+{
+    PyArrayMethod_Spec **casts = init_casts();
+    if (!casts)
+        return -1;
+
+    PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = {
+            .flags = NPY_DT_NUMERIC,
+            .casts = casts,
+            .typeobj = &QuadPrecision_Type,
+            .slots = QuadPrecDType_Slots,
+    };
+
+    ((PyObject *)&QuadPrecDType)->ob_type = &PyArrayDTypeMeta_Type;
+
+    ((PyTypeObject *)&QuadPrecDType)->tp_base = &PyArrayDescr_Type;
+
+    if (PyType_Ready((PyTypeObject *)&QuadPrecDType) < 0) {
+        return -1;
+    }
+
+    if (PyArrayInitDTypeMeta_FromSpec(&QuadPrecDType, &QuadPrecDType_DTypeSpec) < 0) {
+        return -1;
+    }
+
+    free_casts();
+
+    return 0;
+}
\ No newline at end of file
diff --git a/quaddtype/quaddtype/src/dtype.h b/quaddtype/numpy_quaddtype/src/dtype.h
similarity index 53%
rename from quaddtype/quaddtype/src/dtype.h
rename to quaddtype/numpy_quaddtype/src/dtype.h
index 162d5714..243018d3 100644
--- a/quaddtype/quaddtype/src/dtype.h
+++ b/quaddtype/numpy_quaddtype/src/dtype.h
@@ -1,25 +1,23 @@
 #ifndef _QUADDTYPE_DTYPE_H
 #define _QUADDTYPE_DTYPE_H
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include<Python.h>
-#include<sleef.h>
-#include<numpy/ndarraytypes.h>
-#include<numpy/dtype_api.h>
-
-#include "scalar.h"
+#include <Python.h>
+#include <numpy/ndarraytypes.h>
+#include <numpy/dtype_api.h>
+#include "quad_common.h"
 
-typedef struct
-{
+typedef struct {
     PyArray_Descr base;
-    
+    QuadBackendType backend;
 } QuadPrecDTypeObject;
 
 extern PyArray_DTypeMeta QuadPrecDType;
 
-QuadPrecDTypeObject * new_quaddtype_instance(void);
+QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend);
 
 int init_quadprec_dtype(void);
 
diff --git a/quaddtype/quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp
similarity index 100%
rename from quaddtype/quaddtype/src/ops.hpp
rename to quaddtype/numpy_quaddtype/src/ops.hpp
diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h
new file mode 100644
index 00000000..fcdf6f2b
--- /dev/null
+++ b/quaddtype/numpy_quaddtype/src/quad_common.h
@@ -0,0 +1,17 @@
+#ifndef _QUADDTYPE_COMMON_H
+#define _QUADDTYPE_COMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+    BACKEND_SLEEF = 0,
+    BACKEND_LONGDOUBLE
+} QuadBackendType;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/quaddtype/quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
similarity index 68%
rename from quaddtype/quaddtype/src/quaddtype_main.c
rename to quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 098c0749..ce263e49 100644
--- a/quaddtype/quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -9,13 +9,15 @@
 #include "numpy/dtype_api.h"
 #include "numpy/ufuncobject.h"
 
+#include "scalar.h"
 #include "dtype.h"
-#include "umath.h"
+// #include "umath.h"
+#include "quad_common.h"
 
 static struct PyModuleDef moduledef = {
         PyModuleDef_HEAD_INIT,
         .m_name = "_quaddtype_main",
-        .m_doc = "Quad (128-bit) floating point Data Type for Numpy",
+        .m_doc = "Quad (128-bit) floating point Data Type for NumPy with multiple backends",
         .m_size = -1,
 };
 
@@ -25,31 +27,29 @@ PyInit__quaddtype_main(void)
     import_array();
     import_umath();
     PyObject *m = PyModule_Create(&moduledef);
-    if (!m) 
-    {
+    if (!m) {
         return NULL;
     }
 
     if (init_quadprecision_scalar() < 0)
         goto error;
-    
-    if(PyModule_AddObject(m, "QuadPrecision", (PyObject *)&QuadPrecision_Type) < 0)
-        goto error;
 
-    if(init_quadprec_dtype() < 0)
+    if (PyModule_AddObject(m, "QuadPrecision", (PyObject *)&QuadPrecision_Type) < 0)
         goto error;
 
-    if(PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0)
+    if (init_quadprec_dtype() < 0)
         goto error;
 
-    if (init_quad_umath() < 0) {
+    if (PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0)
         goto error;
-    }
+
+    // if (init_quad_umath() < 0) {
+    //     goto error;
+    // }
 
     return m;
-    
 
 error:
-    Py_DECREF(m);
+    Py_XDECREF(m);
     return NULL;
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
new file mode 100644
index 00000000..a3101286
--- /dev/null
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -0,0 +1,159 @@
+#include <Python.h>
+#include <sleef.h>
+#include <sleefquad.h>
+#include <stdlib.h>
+
+#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
+#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
+#define NO_IMPORT_ARRAY
+
+#include "numpy/arrayobject.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/dtype_api.h"
+
+#include "scalar.h"
+// #include "scalar_ops.h"
+
+QuadPrecisionObject *
+QuadPrecision_raw_new(QuadBackendType backend)
+{
+    QuadPrecisionObject *new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type);
+    if (!new)
+        return NULL;
+    new->backend = backend;
+    if (backend == BACKEND_SLEEF) {
+        new->value.sleef_value = Sleef_cast_from_doubleq1(0.0);
+    }
+    else {
+        new->value.longdouble_value = 0.0L;
+    }
+    return new;
+}
+
+QuadPrecisionObject *
+QuadPrecision_from_object(PyObject *value, QuadBackendType backend)
+{
+    QuadPrecisionObject *self = QuadPrecision_raw_new(backend);
+    if (!self)
+        return NULL;
+
+    if (PyFloat_Check(value)) {
+        double dval = PyFloat_AsDouble(value);
+        if (backend == BACKEND_SLEEF) {
+            self->value.sleef_value = Sleef_cast_from_doubleq1(dval);
+        }
+        else {
+            self->value.longdouble_value = (long double)dval;
+        }
+    }
+    else if (PyUnicode_CheckExact(value)) {
+        const char *s = PyUnicode_AsUTF8(value);
+        char *endptr = NULL;
+        if (backend == BACKEND_SLEEF) {
+            self->value.sleef_value = Sleef_strtoq(s, &endptr);
+        }
+        else {
+            self->value.longdouble_value = strtold(s, &endptr);
+        }
+        if (*endptr != '\0' || endptr == s) {
+            PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision");
+            Py_DECREF(self);
+            return NULL;
+        }
+    }
+    else if (PyLong_Check(value)) {
+        long long val = PyLong_AsLongLong(value);
+        if (val == -1 && PyErr_Occurred()) {
+            PyErr_SetString(PyExc_OverflowError, "Overflow Error, value out of range");
+            Py_DECREF(self);
+            return NULL;
+        }
+        if (backend == BACKEND_SLEEF) {
+            self->value.sleef_value = Sleef_cast_from_int64q1(val);
+        }
+        else {
+            self->value.longdouble_value = (long double)val;
+        }
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError, "QuadPrecision value must be a float, int or string");
+        Py_DECREF(self);
+        return NULL;
+    }
+
+    return self;
+}
+
+static PyObject *
+QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
+{
+    PyObject *value;
+    const char *backend_str = "sleef";
+    static char *kwlist[] = {"value", "backend", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s", kwlist, &value, &backend_str)) {
+        return NULL;
+    }
+
+    QuadBackendType backend = BACKEND_SLEEF;
+    if (strcmp(backend_str, "longdouble") == 0) {
+        backend = BACKEND_LONGDOUBLE;
+    }
+    else if (strcmp(backend_str, "sleef") != 0) {
+        PyErr_SetString(PyExc_ValueError, "Invalid backend. Use 'sleef' or 'longdouble'.");
+        return NULL;
+    }
+
+    return (PyObject *)QuadPrecision_from_object(value, backend);
+}
+
+static PyObject *
+QuadPrecision_str(QuadPrecisionObject *self)
+{
+    char buffer[128];
+    if (self->backend == BACKEND_SLEEF) {
+        Sleef_snprintf(buffer, sizeof(buffer), "%.*Qe", SLEEF_QUAD_DIG, self->value.sleef_value);
+    }
+    else {
+        snprintf(buffer, sizeof(buffer), "%.35Le", self->value.longdouble_value);
+    }
+    return PyUnicode_FromString(buffer);
+}
+
+static PyObject *
+QuadPrecision_repr(QuadPrecisionObject *self)
+{
+    PyObject *str = QuadPrecision_str(self);
+    if (str == NULL) {
+        return NULL;
+    }
+    const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
+    PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str);
+    Py_DECREF(str);
+    return res;
+}
+
+static void
+QuadPrecision_dealloc(QuadPrecisionObject *self)
+{
+    Py_TYPE(self)->tp_free((PyObject *)self);
+}
+
+PyTypeObject QuadPrecision_Type = {
+        PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision",
+        .tp_basicsize = sizeof(QuadPrecisionObject),
+        .tp_itemsize = 0,
+        .tp_new = QuadPrecision_new,
+        .tp_dealloc = (destructor)QuadPrecision_dealloc,
+        .tp_repr = (reprfunc)QuadPrecision_repr,
+        .tp_str = (reprfunc)QuadPrecision_str,
+        // .tp_as_number = &quad_as_scalar,
+        // .tp_richcompare = (richcmpfunc)quad_richcompare
+
+};
+
+int
+init_quadprecision_scalar(void)
+{
+    return PyType_Ready(&QuadPrecision_Type);
+}
\ No newline at end of file
diff --git a/quaddtype/quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h
similarity index 55%
rename from quaddtype/quaddtype/src/scalar.h
rename to quaddtype/numpy_quaddtype/src/scalar.h
index 962a1fec..344fef09 100644
--- a/quaddtype/quaddtype/src/scalar.h
+++ b/quaddtype/numpy_quaddtype/src/scalar.h
@@ -7,23 +7,26 @@ extern "C" {
 
 #include <Python.h>
 #include <sleef.h>
+#include "quad_common.h"
 
-typedef struct {
-    Sleef_quad value;
-} quad_field;
+typedef union {
+    Sleef_quad sleef_value;
+    long double longdouble_value;
+} quad_value;
 
 typedef struct {
     PyObject_HEAD
-    quad_field quad;
+    quad_value value;
+    QuadBackendType backend;
 } QuadPrecisionObject;
 
 extern PyTypeObject QuadPrecision_Type;
 
 QuadPrecisionObject *
-QuadPrecision_raw_new(void);
+QuadPrecision_raw_new(QuadBackendType backend);
 
 QuadPrecisionObject *
-QuadPrecision_from_object(PyObject *value);
+QuadPrecision_from_object(PyObject *value, QuadBackendType backend);
 
 int
 init_quadprecision_scalar(void);
diff --git a/quaddtype/quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
similarity index 100%
rename from quaddtype/quaddtype/src/scalar_ops.cpp
rename to quaddtype/numpy_quaddtype/src/scalar_ops.cpp
diff --git a/quaddtype/quaddtype/src/scalar_ops.h b/quaddtype/numpy_quaddtype/src/scalar_ops.h
similarity index 100%
rename from quaddtype/quaddtype/src/scalar_ops.h
rename to quaddtype/numpy_quaddtype/src/scalar_ops.h
diff --git a/quaddtype/quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
similarity index 100%
rename from quaddtype/quaddtype/src/umath.cpp
rename to quaddtype/numpy_quaddtype/src/umath.cpp
diff --git a/quaddtype/quaddtype/src/umath.h b/quaddtype/numpy_quaddtype/src/umath.h
similarity index 100%
rename from quaddtype/quaddtype/src/umath.h
rename to quaddtype/numpy_quaddtype/src/umath.h
diff --git a/quaddtype/pyproject.toml b/quaddtype/pyproject.toml
index 68b61e7f..836b73f2 100644
--- a/quaddtype/pyproject.toml
+++ b/quaddtype/pyproject.toml
@@ -9,7 +9,7 @@ requires = [
 build-backend = "mesonpy"
 
 [project]
-name = "quaddtype"
+name = "numpy_quaddtype"
 description = "Quad (128-bit) float dtype for numpy"
 version = "0.0.1"
 readme = 'README.md'
@@ -22,4 +22,4 @@ dependencies = [
 [project.optional-dependencies]
 test = [
     "pytest",
-]
+]
\ No newline at end of file
diff --git a/quaddtype/quaddtype/__init__.py b/quaddtype/quaddtype/__init__.py
deleted file mode 100644
index 9f246f73..00000000
--- a/quaddtype/quaddtype/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._quaddtype_main import QuadPrecDType, QuadPrecision
\ No newline at end of file
diff --git a/quaddtype/quaddtype/src/casts.cpp b/quaddtype/quaddtype/src/casts.cpp
deleted file mode 100644
index c096fe5c..00000000
--- a/quaddtype/quaddtype/src/casts.cpp
+++ /dev/null
@@ -1,468 +0,0 @@
-#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
-#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
-#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
-#define NPY_TARGET_VERSION NPY_2_0_API_VERSION
-#define NO_IMPORT_ARRAY
-#define NO_IMPORT_UFUNC
-
-extern "C" {
-#include <Python.h>
-
-#include "numpy/arrayobject.h"
-#include "numpy/ndarraytypes.h"
-#include "numpy/dtype_api.h"
-}
-#include "sleef.h"
-#include "sleefquad.h"
-
-#include "scalar.h"
-#include "casts.h"
-#include "dtype.h"
-
-#define NUM_CASTS 29  // 14 to_casts + 14 from_casts + 1 quad_to_quad
-
-static NPY_CASTING
-quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
-                                 PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
-                                 QuadPrecDTypeObject *given_descrs[2],
-                                 QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset)
-{
-    Py_INCREF(given_descrs[0]);
-    loop_descrs[0] = given_descrs[0];
-
-    if (given_descrs[1] == NULL) {
-        Py_INCREF(given_descrs[0]);
-        loop_descrs[1] = given_descrs[0];
-    }
-    else {
-        Py_INCREF(given_descrs[1]);
-        loop_descrs[1] = given_descrs[1];
-    }
-
-    *view_offset = 0;
-    return NPY_NO_CASTING;
-}
-
-static int
-quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                          npy_intp const dimensions[], npy_intp const strides[],
-                          void *NPY_UNUSED(auxdata))
-{
-    npy_intp N = dimensions[0];
-    char *in_ptr = data[0];
-    char *out_ptr = data[1];
-
-    npy_intp in_stride = strides[0];
-    npy_intp out_stride = strides[1];
-
-    while (N--) {
-        memcpy(out_ptr, in_ptr, sizeof(Sleef_quad));
-        in_ptr += in_stride;
-        out_ptr += out_stride;
-    }
-    return 0;
-}
-
-// Casting from other types to QuadDType
-
-template <typename T>
-static inline Sleef_quad
-to_quad(T x);
-
-template <>
-inline Sleef_quad
-to_quad<npy_bool>(npy_bool x)
-{
-    return x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_byte>(npy_byte x)
-{
-    return Sleef_cast_from_int64q1(x);
-}
-// template <>
-// inline Sleef_quad
-// to_quad<npy_ubyte>(npy_ubyte x)
-// {
-//     return Sleef_cast_from_uint64q1(x);
-// }
-template <>
-inline Sleef_quad
-to_quad<npy_short>(npy_short x)
-{
-    return Sleef_cast_from_int64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_ushort>(npy_ushort x)
-{
-    return Sleef_cast_from_uint64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_int>(npy_int x)
-{
-    return Sleef_cast_from_int64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_uint>(npy_uint x)
-{
-    return Sleef_cast_from_uint64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_long>(npy_long x)
-{
-    return Sleef_cast_from_int64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_ulong>(npy_ulong x)
-{
-    return Sleef_cast_from_uint64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_longlong>(npy_longlong x)
-{
-    return Sleef_cast_from_int64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<npy_ulonglong>(npy_ulonglong x)
-{
-    return Sleef_cast_from_uint64q1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<float>(float x)
-{
-    return Sleef_cast_from_doubleq1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<double>(double x)
-{
-    return Sleef_cast_from_doubleq1(x);
-}
-template <>
-inline Sleef_quad
-to_quad<long double>(long double x)
-{
-    return Sleef_cast_from_doubleq1(x);
-}
-
-template <typename T>
-static NPY_CASTING
-numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
-                                  PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
-                                  npy_intp *view_offset)
-{
-    if (given_descrs[1] == NULL) {
-        loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance();
-        if (loop_descrs[1] == nullptr) {
-            return (NPY_CASTING)-1;
-        }
-    }
-    else {
-        Py_INCREF(given_descrs[1]);
-        loop_descrs[1] = given_descrs[1];
-    }
-
-    loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]);
-    // *view_offset = 0;
-    return NPY_SAFE_CASTING;
-}
-
-template <typename T>
-static int
-numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                           npy_intp const dimensions[], npy_intp const strides[],
-                           void *NPY_UNUSED(auxdata))
-{
-    npy_intp N = dimensions[0];
-    char *in_ptr = data[0];
-    char *out_ptr = data[1];
-
-    while (N--) {
-        T in_val;
-        Sleef_quad out_val;
-
-        memcpy(&in_val, in_ptr, sizeof(T));
-        out_val = to_quad<T>(in_val);
-        memcpy(out_ptr, &out_val, sizeof(Sleef_quad));
-
-        in_ptr += strides[0];
-        out_ptr += strides[1];
-    }
-    return 0;
-}
-
-// Casting from QuadDType to other types
-
-template <typename T>
-static inline T
-from_quad(Sleef_quad x);
-
-template <>
-inline npy_bool
-from_quad<npy_bool>(Sleef_quad x)
-{
-    return Sleef_cast_to_int64q1(x) != 0;
-}
-template <>
-inline npy_byte
-from_quad<npy_byte>(Sleef_quad x)
-{
-    return (npy_byte)Sleef_cast_to_int64q1(x);
-}
-// template <>
-// inline npy_ubyte
-// from_quad<npy_ubyte>(Sleef_quad x)
-// {
-//     return (npy_ubyte)Sleef_cast_to_uint64q1(x);
-// }
-template <>
-inline npy_short
-from_quad<npy_short>(Sleef_quad x)
-{
-    return (npy_short)Sleef_cast_to_int64q1(x);
-}
-template <>
-inline npy_ushort
-from_quad<npy_ushort>(Sleef_quad x)
-{
-    return (npy_ushort)Sleef_cast_to_uint64q1(x);
-}
-template <>
-inline npy_int
-from_quad<npy_int>(Sleef_quad x)
-{
-    return (npy_int)Sleef_cast_to_int64q1(x);
-}
-template <>
-inline npy_uint
-from_quad<npy_uint>(Sleef_quad x)
-{
-    return (npy_uint)Sleef_cast_to_uint64q1(x);
-}
-template <>
-inline npy_long
-from_quad<npy_long>(Sleef_quad x)
-{
-    return (npy_long)Sleef_cast_to_int64q1(x);
-}
-template <>
-inline npy_ulong
-from_quad<npy_ulong>(Sleef_quad x)
-{
-    return (npy_ulong)Sleef_cast_to_uint64q1(x);
-}
-template <>
-inline npy_longlong
-from_quad<npy_longlong>(Sleef_quad x)
-{
-    return Sleef_cast_to_int64q1(x);
-}
-template <>
-inline npy_ulonglong
-from_quad<npy_ulonglong>(Sleef_quad x)
-{
-    return Sleef_cast_to_uint64q1(x);
-}
-template <>
-inline float
-from_quad<float>(Sleef_quad x)
-{
-    return Sleef_cast_to_doubleq1(x);
-}
-template <>
-inline double
-from_quad<double>(Sleef_quad x)
-{
-    return Sleef_cast_to_doubleq1(x);
-}
-template <>
-inline long double
-from_quad<long double>(Sleef_quad x)
-{
-    return Sleef_cast_to_doubleq1(x);
-}
-
-template <typename T>
-static NPY_CASTING
-quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
-                                  PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
-                                  npy_intp *view_offset)
-{
-    Py_INCREF(given_descrs[0]);
-    loop_descrs[0] = given_descrs[0];
-
-    loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]);
-    // *view_offset = 0;
-    return NPY_UNSAFE_CASTING;
-}
-
-template <typename T>
-static int
-quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                           npy_intp const dimensions[], npy_intp const strides[],
-                           void *NPY_UNUSED(auxdata))
-{
-    npy_intp N = dimensions[0];
-    char *in_ptr = data[0];
-    char *out_ptr = data[1];
-
-    while (N--) {
-        Sleef_quad in_val = *(Sleef_quad *)in_ptr;
-        T *out_val = (T *)out_ptr;
-        *out_val = from_quad<T>(in_val);
-
-        in_ptr += strides[0];
-        out_ptr += strides[1];
-    }
-    return 0;
-}
-
-static PyArrayMethod_Spec *specs[NUM_CASTS + 1];  // +1 for NULL terminator
-static size_t spec_count = 0;
-
-void
-add_spec(PyArrayMethod_Spec *spec)
-{
-    if (spec_count < NUM_CASTS) {
-        specs[spec_count++] = spec;
-    }
-}
-
-// functions to add casts
-template <typename T>
-void
-add_cast_from(PyArray_DTypeMeta *to)
-{
-    PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{nullptr, to};
-
-    PyType_Slot *slots = new PyType_Slot[3]{
-            {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors<T>},
-            {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop<T>},
-            {0, nullptr}};
-
-    PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
-            .name = "cast_QuadPrec_to_NumPy",
-            .nin = 1,
-            .nout = 1,
-            .casting = NPY_UNSAFE_CASTING,
-            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
-            .dtypes = dtypes,
-            .slots = slots,
-    };
-    add_spec(spec);
-}
-
-template <typename T>
-void
-add_cast_to(PyArray_DTypeMeta *from)
-{
-    PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, nullptr};
-
-    PyType_Slot *slots = new PyType_Slot[3]{
-            {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors<T>},
-            {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop<T>},
-            {0, nullptr}};
-
-    PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
-            .name = "cast_NumPy_to_QuadPrec",
-            .nin = 1,
-            .nout = 1,
-            .casting = NPY_SAFE_CASTING,
-            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
-            .dtypes = dtypes,
-            .slots = slots,
-    };
-
-    add_spec(spec);
-}
-
-PyArrayMethod_Spec **
-init_casts_internal(void)
-{
-    PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr};
-    PyType_Slot *quad2quad_slots = new PyType_Slot[4]{
-            {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
-            {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop},
-            {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop},
-            {0, nullptr}};
-
-    PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{
-            .name = "cast_QuadPrec_to_QuadPrec",
-            .nin = 1,
-            .nout = 1,
-            .casting = NPY_NO_CASTING,
-            .flags = NPY_METH_SUPPORTS_UNALIGNED,
-            .dtypes = quad2quad_dtypes,
-            .slots = quad2quad_slots,
-    };
-
-    add_spec(quad2quad_spec);
-
-    add_cast_to<npy_bool>(&PyArray_BoolDType);
-    add_cast_to<npy_byte>(&PyArray_ByteDType);
-    add_cast_to<npy_ubyte>(&PyArray_UByteDType);
-    add_cast_to<npy_short>(&PyArray_ShortDType);
-    add_cast_to<npy_ushort>(&PyArray_UShortDType);
-    add_cast_to<npy_int>(&PyArray_IntDType);
-    add_cast_to<npy_uint>(&PyArray_UIntDType);
-    add_cast_to<npy_long>(&PyArray_LongDType);
-    add_cast_to<npy_ulong>(&PyArray_ULongDType);
-    add_cast_to<npy_longlong>(&PyArray_LongLongDType);
-    add_cast_to<npy_ulonglong>(&PyArray_ULongLongDType);
-    add_cast_to<float>(&PyArray_FloatDType);
-    add_cast_to<double>(&PyArray_DoubleDType);
-    add_cast_to<long double>(&PyArray_LongDoubleDType);
-
-    add_cast_from<npy_bool>(&PyArray_BoolDType);
-    add_cast_from<npy_byte>(&PyArray_ByteDType);
-    add_cast_from<npy_ubyte>(&PyArray_UByteDType);
-    add_cast_from<npy_short>(&PyArray_ShortDType);
-    add_cast_from<npy_ushort>(&PyArray_UShortDType);
-    add_cast_from<npy_int>(&PyArray_IntDType);
-    add_cast_from<npy_uint>(&PyArray_UIntDType);
-    add_cast_from<npy_long>(&PyArray_LongDType);
-    add_cast_from<npy_ulong>(&PyArray_ULongDType);
-    add_cast_from<npy_longlong>(&PyArray_LongLongDType);
-    add_cast_from<npy_ulonglong>(&PyArray_ULongLongDType);
-    add_cast_from<float>(&PyArray_FloatDType);
-    add_cast_from<double>(&PyArray_DoubleDType);
-    add_cast_from<long double>(&PyArray_LongDoubleDType);
-
-    specs[spec_count] = nullptr;
-    return specs;
-}
-
-PyArrayMethod_Spec **
-init_casts(void)
-{
-    try {
-        return init_casts_internal();
-    }
-    catch (int e) {
-        PyErr_NoMemory();
-        return nullptr;
-    }
-}
-
-void
-free_casts(void)
-{
-    for (auto cast : specs) {
-        if (cast == nullptr) {
-            continue;
-        }
-        delete cast->dtypes;
-        delete cast->slots;
-        delete cast;
-    }
-    spec_count = 0;
-}
diff --git a/quaddtype/quaddtype/src/dtype.c b/quaddtype/quaddtype/src/dtype.c
deleted file mode 100644
index a6f485bf..00000000
--- a/quaddtype/quaddtype/src/dtype.c
+++ /dev/null
@@ -1,216 +0,0 @@
-#include <Python.h>
-#include <sleef.h>
-#include <sleefquad.h>
-
-#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
-#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
-#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
-#define NPY_TARGET_VERSION NPY_2_0_API_VERSION
-#define NO_IMPORT_ARRAY
-#define NO_IMPORT_UFUNC
-#include "numpy/arrayobject.h"
-#include "numpy/ndarraytypes.h"
-#include "numpy/dtype_api.h"
-
-#include "scalar.h"
-#include "casts.h"
-#include "dtype.h"
-
-static inline int quad_load(Sleef_quad *x, char *data_ptr) 
-{
-    if (data_ptr == NULL || x == NULL) 
-    {
-        return -1;
-    }
-    *x = *(Sleef_quad *)data_ptr;
-    return 0;
-}
-
-static inline int quad_store(char *data_ptr, Sleef_quad x) 
-{
-    if (data_ptr == NULL) 
-    {
-        return -1;
-    }
-    *(Sleef_quad *)data_ptr = x;
-    return 0;
-}
-
-QuadPrecDTypeObject  * new_quaddtype_instance(void)
-{
-    QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new((PyTypeObject *)&QuadPrecDType, NULL, NULL);
-    if (new == NULL) {
-        return NULL;
-    }
-    new->base.elsize = sizeof(Sleef_quad);
-    new->base.alignment = _Alignof(Sleef_quad);
-
-    return new;
-}
-
-static QuadPrecDTypeObject * ensure_canonical(QuadPrecDTypeObject *self)
-{
-    Py_INCREF(self);
-    return self;
-}
-
-static QuadPrecDTypeObject * common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
-{
-    Py_INCREF(dtype1);
-    return dtype1;
-}
-
-
-static PyArray_DTypeMeta * common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
-{
-    // Promote integer and floating-point types to QuadPrecDType
-    if (other->type_num >= 0 && 
-        (PyTypeNum_ISINTEGER(other->type_num) || 
-         PyTypeNum_ISFLOAT(other->type_num))) {
-        Py_INCREF(cls);
-        return cls;
-    }
-    // Don't promote complex types
-    if (PyTypeNum_ISCOMPLEX(other->type_num)) {
-        Py_INCREF(Py_NotImplemented);
-        return (PyArray_DTypeMeta *)Py_NotImplemented;
-    }
-
-    Py_INCREF(Py_NotImplemented);
-    return (PyArray_DTypeMeta *)Py_NotImplemented;
-}
-
-static PyArray_Descr *
-quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj)
-{
-    if (Py_TYPE(obj) != &QuadPrecision_Type) 
-    {
-        PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array.");
-        return NULL;
-    }
-    return (PyArray_Descr *)new_quaddtype_instance();
-}
-
-static int quadprec_setitem(QuadPrecDTypeObject *descr, PyObject *obj, char *dataptr)
-{
-    QuadPrecisionObject *value;
-    if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) 
-    {
-        Py_INCREF(obj);
-        value = (QuadPrecisionObject *)obj;
-    }
-    else 
-    {
-        value = QuadPrecision_from_object(obj);
-        if (value == NULL) {
-            return -1;
-        }
-    }
-
-    if (quad_store(dataptr, value->quad.value) < 0)
-    {
-        Py_DECREF(value);
-        char error_msg[100];
-        snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void*)dataptr);
-        PyErr_SetString(PyExc_ValueError, error_msg);
-        return -1;
-    }
-
-    Py_DECREF(value);
-    return 0;
-}
-
-static PyObject * quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr)
-{
-    QuadPrecisionObject *new = QuadPrecision_raw_new();
-    if (!new) 
-    {
-        return NULL;
-    }
-    if (quad_load(&new->quad.value, dataptr) < 0) 
-    {
-        Py_DECREF(new);
-        char error_msg[100];
-        snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void*)dataptr);
-        PyErr_SetString(PyExc_ValueError, error_msg);
-        return NULL;
-    }
-    return (PyObject *)new;
-}
-
-static PyArray_Descr *quadprec_default_descr(PyArray_DTypeMeta *NPY_UNUSED(cls))
-{
-    return (PyArray_Descr *)new_quaddtype_instance();
-}
-
-static PyType_Slot QuadPrecDType_Slots[] = 
-{
-    {NPY_DT_ensure_canonical, &ensure_canonical},
-    {NPY_DT_common_instance, &common_instance},
-    {NPY_DT_common_dtype, &common_dtype},
-    {NPY_DT_discover_descr_from_pyobject, &quadprec_discover_descriptor_from_pyobject},
-    {NPY_DT_setitem, &quadprec_setitem},
-    {NPY_DT_getitem, &quadprec_getitem},
-    {NPY_DT_default_descr, &quadprec_default_descr},
-    {0, NULL}
-};
-
-
-static PyObject * QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds)
-{
-    if (PyTuple_GET_SIZE(args) != 0 || (kwds != NULL && PyDict_Size(kwds) != 0)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "QuadPrecDType takes no arguments");
-        return NULL;
-    }
-
-    return (PyObject *)new_quaddtype_instance();
-}
-
-static PyObject * QuadPrecDType_repr(QuadPrecDTypeObject *self)
-{
-    return PyUnicode_FromString("QuadPrecDType()");
-}
-
-PyArray_DTypeMeta QuadPrecDType = {
-    {{
-        PyVarObject_HEAD_INIT(NULL, 0)
-        .tp_name = "QuadPrecDType.QuadPrecDType",
-        .tp_basicsize = sizeof(QuadPrecDTypeObject),
-        .tp_new = QuadPrecDType_new,
-        .tp_repr = (reprfunc)QuadPrecDType_repr,
-        .tp_str = (reprfunc)QuadPrecDType_repr,
-    }},
-};
-
-int init_quadprec_dtype(void)
-{
-    PyArrayMethod_Spec **casts = init_casts();
-    if (!casts)
-        return -1;
-
-    PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = {
-        .flags = NPY_DT_NUMERIC,
-        .casts = casts,
-        .typeobj = &QuadPrecision_Type,
-        .slots = QuadPrecDType_Slots,
-    };
-
-    ((PyObject *)&QuadPrecDType)->ob_type = &PyArrayDTypeMeta_Type;
-
-    ((PyTypeObject *)&QuadPrecDType)->tp_base = &PyArrayDescr_Type;
-
-    if (PyType_Ready((PyTypeObject *)&QuadPrecDType) < 0) 
-    {
-        return -1;
-    }
-
-    if (PyArrayInitDTypeMeta_FromSpec(&QuadPrecDType, &QuadPrecDType_DTypeSpec) < 0)
-    {
-        return -1;
-    }
-
-    free_casts();
-    
-    return 0;
-}
\ No newline at end of file
diff --git a/quaddtype/quaddtype/src/scalar.c b/quaddtype/quaddtype/src/scalar.c
deleted file mode 100644
index 15f727d8..00000000
--- a/quaddtype/quaddtype/src/scalar.c
+++ /dev/null
@@ -1,126 +0,0 @@
-#include<Python.h>
-#include<sleef.h>
-#include<sleefquad.h>
-
-#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
-#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
-#define NO_IMPORT_ARRAY
-
-#include "numpy/arrayobject.h"
-#include "numpy/ndarraytypes.h"
-#include "numpy/dtype_api.h"
-
-#include "scalar.h"
-#include "scalar_ops.h"
-
-
-// static PyTypeObject QuadPrecision_Type;
-
-
-QuadPrecisionObject * QuadPrecision_raw_new(void)
-{
-    QuadPrecisionObject * new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type);
-    if(!new)
-        return NULL;
-    new->quad.value = Sleef_cast_from_doubleq1(0.0); // initialize to 0
-    return new;
-}
-
-QuadPrecisionObject * QuadPrecision_from_object(PyObject * value)
-{
-    QuadPrecisionObject *self = QuadPrecision_raw_new();
-    if(!self)
-        return NULL;
-    
-    if(PyFloat_Check(value))
-        self->quad.value = Sleef_cast_from_doubleq1(PyFloat_AsDouble(value));
-    
-    else if(PyUnicode_CheckExact(value))
-    {
-        const char * s = PyUnicode_AsUTF8(value);
-        char *endptr = NULL;
-        self->quad.value = Sleef_strtoq(s, &endptr);
-        if (*endptr != '\0' || endptr == s)
-        {
-            PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision");
-            Py_DECREF(self);
-            return NULL;
-        }
-    }
-    else if(PyLong_Check(value))
-    {
-        long int val = PyLong_AsLong(value);
-        if (val == -1)
-        {
-            PyErr_SetString(PyExc_OverflowError, "Overflow Error, value out of range");
-            Py_DECREF(self);
-            return NULL;
-        }
-        self->quad.value = Sleef_cast_from_int64q1(val);
-    }
-    else
-    {
-        PyErr_SetString(PyExc_TypeError, "QuadPrecision value must be a float, int or string");
-        Py_DECREF(self);
-        return NULL;
-    }
-
-    return self;
-}
-
-static PyObject *
-QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
-{
-    PyObject *value;
-
-    if (!PyArg_ParseTuple(args, "O", &value)) {
-        return NULL;
-    }
-
-    return (PyObject *)QuadPrecision_from_object(value);
-}
-
-static PyObject * QuadPrecision_str(QuadPrecisionObject * self)
-{
-    char buffer[128];
-    Sleef_snprintf(buffer, sizeof(buffer), "%.*Qe", SLEEF_QUAD_DIG, self->quad.value);
-    return PyUnicode_FromString(buffer);
-}
-
-static PyObject * QuadPrecision_repr(QuadPrecisionObject* self)
-{
-    PyObject *str = QuadPrecision_str(self);
-    if (str == NULL) {
-        return NULL;
-    }
-    PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S')", str);
-    Py_DECREF(str);
-    return res;
-}
-
-static void
-quad_dealloc(QuadPrecDTypeObject *self)
-{
-    Py_TYPE(self)->tp_free((PyObject *)self);
-}
-
-PyTypeObject QuadPrecision_Type = 
-{
-    PyVarObject_HEAD_INIT(NULL, 0)
-    .tp_name = "QuadPrecType.QuadPrecision",
-    .tp_basicsize = sizeof(QuadPrecisionObject),
-    .tp_itemsize = 0,
-    .tp_new = QuadPrecision_new,
-    .tp_dealloc = (destructor)quad_dealloc,
-    .tp_repr = (reprfunc)QuadPrecision_repr,
-    .tp_str = (reprfunc)QuadPrecision_str,
-    .tp_as_number = &quad_as_scalar,
-    .tp_richcompare = (richcmpfunc)quad_richcompare
-
-};
-
-int
-init_quadprecision_scalar(void)
-{
-    return PyType_Ready(&QuadPrecision_Type);
-}
\ No newline at end of file
diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh
index 31479511..afec2b59 100755
--- a/quaddtype/reinstall.sh
+++ b/quaddtype/reinstall.sh
@@ -8,6 +8,6 @@ then
 fi
 
 #meson setup build -Db_sanitize=address,undefined
-python -m pip uninstall -y quaddtype
+python -m pip uninstall -y quaddtype numpy_quaddtype
 python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug"
 #python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v'
\ No newline at end of file

From 8408227b4fe44c309c330fcafd349c1e209a8176 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sun, 1 Sep 2024 04:17:31 +0530
Subject: [PATCH 03/32] added ld backend support to casting

---
 quaddtype/numpy_quaddtype/src/casts.cpp     | 860 ++++++++++++--------
 quaddtype/numpy_quaddtype/src/dtype.c       |   2 +-
 quaddtype/numpy_quaddtype/src/quad_common.h |   2 +-
 quaddtype/numpy_quaddtype/src/scalar.c      |  25 +-
 4 files changed, 546 insertions(+), 343 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index 44c316e0..473fd9af 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -56,12 +56,20 @@ quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
     npy_intp N = dimensions[0];
     char *in_ptr = data[0];
     char *out_ptr = data[1];
-
     npy_intp in_stride = strides[0];
     npy_intp out_stride = strides[1];
 
-    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
-    size_t elem_size = (descr->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+    QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
+
+    if (descr_in->backend != descr_out->backend) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Cannot convert between different quad-precision backends");
+        return -1;
+    }
+
+    size_t elem_size =
+            (descr_in->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
 
     while (N--) {
         memcpy(out_ptr, in_ptr, elem_size);
@@ -73,266 +81,442 @@ quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
 
 // Casting from other types to QuadDType
 
-// template <typename T>
-// static inline Sleef_quad
-// to_quad(T x);
-
-// template <>
-// inline Sleef_quad
-// to_quad<npy_bool>(npy_bool x)
-// {
-//     return x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_byte>(npy_byte x)
-// {
-//     return Sleef_cast_from_int64q1(x);
-// }
-// // template <>
-// // inline Sleef_quad
-// // to_quad<npy_ubyte>(npy_ubyte x)
-// // {
-// //     return Sleef_cast_from_uint64q1(x);
-// // }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_short>(npy_short x)
-// {
-//     return Sleef_cast_from_int64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_ushort>(npy_ushort x)
-// {
-//     return Sleef_cast_from_uint64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_int>(npy_int x)
-// {
-//     return Sleef_cast_from_int64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_uint>(npy_uint x)
-// {
-//     return Sleef_cast_from_uint64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_long>(npy_long x)
-// {
-//     return Sleef_cast_from_int64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_ulong>(npy_ulong x)
-// {
-//     return Sleef_cast_from_uint64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_longlong>(npy_longlong x)
-// {
-//     return Sleef_cast_from_int64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<npy_ulonglong>(npy_ulonglong x)
-// {
-//     return Sleef_cast_from_uint64q1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<float>(float x)
-// {
-//     return Sleef_cast_from_doubleq1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<double>(double x)
-// {
-//     return Sleef_cast_from_doubleq1(x);
-// }
-// template <>
-// inline Sleef_quad
-// to_quad<long double>(long double x)
-// {
-//     return Sleef_cast_from_doubleq1(x);
-// }
-
-// template <typename T>
-// static NPY_CASTING
-// numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
-//                                   PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
-//                                   npy_intp *view_offset)
-// {
-//     if (given_descrs[1] == NULL) {
-//         loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance();
-//         if (loop_descrs[1] == nullptr) {
-//             return (NPY_CASTING)-1;
-//         }
-//     }
-//     else {
-//         Py_INCREF(given_descrs[1]);
-//         loop_descrs[1] = given_descrs[1];
-//     }
-
-//     loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]);
-//     // *view_offset = 0;
-//     return NPY_SAFE_CASTING;
-// }
-
-// template <typename T>
-// static int
-// numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
-//                            npy_intp const dimensions[], npy_intp const strides[],
-//                            void *NPY_UNUSED(auxdata))
-// {
-//     npy_intp N = dimensions[0];
-//     char *in_ptr = data[0];
-//     char *out_ptr = data[1];
-
-//     while (N--) {
-//         T in_val;
-//         Sleef_quad out_val;
-
-//         memcpy(&in_val, in_ptr, sizeof(T));
-//         out_val = to_quad<T>(in_val);
-//         memcpy(out_ptr, &out_val, sizeof(Sleef_quad));
-
-//         in_ptr += strides[0];
-//         out_ptr += strides[1];
-//     }
-//     return 0;
-// }
-
-// // Casting from QuadDType to other types
-
-// template <typename T>
-// static inline T
-// from_quad(Sleef_quad x);
-
-// template <>
-// inline npy_bool
-// from_quad<npy_bool>(Sleef_quad x)
-// {
-//     return Sleef_cast_to_int64q1(x) != 0;
-// }
-// template <>
-// inline npy_byte
-// from_quad<npy_byte>(Sleef_quad x)
-// {
-//     return (npy_byte)Sleef_cast_to_int64q1(x);
-// }
-// // template <>
-// // inline npy_ubyte
-// // from_quad<npy_ubyte>(Sleef_quad x)
-// // {
-// //     return (npy_ubyte)Sleef_cast_to_uint64q1(x);
-// // }
-// template <>
-// inline npy_short
-// from_quad<npy_short>(Sleef_quad x)
-// {
-//     return (npy_short)Sleef_cast_to_int64q1(x);
-// }
-// template <>
-// inline npy_ushort
-// from_quad<npy_ushort>(Sleef_quad x)
-// {
-//     return (npy_ushort)Sleef_cast_to_uint64q1(x);
-// }
-// template <>
-// inline npy_int
-// from_quad<npy_int>(Sleef_quad x)
-// {
-//     return (npy_int)Sleef_cast_to_int64q1(x);
-// }
-// template <>
-// inline npy_uint
-// from_quad<npy_uint>(Sleef_quad x)
-// {
-//     return (npy_uint)Sleef_cast_to_uint64q1(x);
-// }
-// template <>
-// inline npy_long
-// from_quad<npy_long>(Sleef_quad x)
-// {
-//     return (npy_long)Sleef_cast_to_int64q1(x);
-// }
-// template <>
-// inline npy_ulong
-// from_quad<npy_ulong>(Sleef_quad x)
-// {
-//     return (npy_ulong)Sleef_cast_to_uint64q1(x);
-// }
-// template <>
-// inline npy_longlong
-// from_quad<npy_longlong>(Sleef_quad x)
-// {
-//     return Sleef_cast_to_int64q1(x);
-// }
-// template <>
-// inline npy_ulonglong
-// from_quad<npy_ulonglong>(Sleef_quad x)
-// {
-//     return Sleef_cast_to_uint64q1(x);
-// }
-// template <>
-// inline float
-// from_quad<float>(Sleef_quad x)
-// {
-//     return Sleef_cast_to_doubleq1(x);
-// }
-// template <>
-// inline double
-// from_quad<double>(Sleef_quad x)
-// {
-//     return Sleef_cast_to_doubleq1(x);
-// }
-// template <>
-// inline long double
-// from_quad<long double>(Sleef_quad x)
-// {
-//     return Sleef_cast_to_doubleq1(x);
-// }
-
-// template <typename T>
-// static NPY_CASTING
-// quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
-//                                   PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
-//                                   npy_intp *view_offset)
-// {
-//     Py_INCREF(given_descrs[0]);
-//     loop_descrs[0] = given_descrs[0];
-
-//     loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]);
-//     // *view_offset = 0;
-//     return NPY_UNSAFE_CASTING;
-// }
-
-// template <typename T>
-// static int
-// quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[],
-//                            npy_intp const dimensions[], npy_intp const strides[],
-//                            void *NPY_UNUSED(auxdata))
-// {
-//     npy_intp N = dimensions[0];
-//     char *in_ptr = data[0];
-//     char *out_ptr = data[1];
-
-//     while (N--) {
-//         Sleef_quad in_val = *(Sleef_quad *)in_ptr;
-//         T *out_val = (T *)out_ptr;
-//         *out_val = from_quad<T>(in_val);
-
-//         in_ptr += strides[0];
-//         out_ptr += strides[1];
-//     }
-//     return 0;
-// }
+template <typename T>
+static inline quad_value
+to_quad(T x, QuadBackendType backend);
+
+template <>
+inline quad_value
+to_quad<npy_bool>(npy_bool x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0);
+    }
+    else {
+        result.longdouble_value = x ? 1.0L : 0.0L;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_byte>(npy_byte x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_int64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_short>(npy_short x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_int64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_ushort>(npy_ushort x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_uint64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_int>(npy_int x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_int64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_uint>(npy_uint x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_uint64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_long>(npy_long x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_int64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_ulong>(npy_ulong x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_uint64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_longlong>(npy_longlong x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_int64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<npy_ulonglong>(npy_ulonglong x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_uint64q1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+template <>
+inline quad_value
+to_quad<float>(float x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_doubleq1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<double>(double x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_doubleq1(x);
+    }
+    else {
+        result.longdouble_value = (long double)x;
+    }
+    return result;
+}
+
+template <>
+inline quad_value
+to_quad<long double>(long double x, QuadBackendType backend)
+{
+    quad_value result;
+    if (backend == BACKEND_SLEEF) {
+        result.sleef_value = Sleef_cast_from_doubleq1(x);
+    }
+    else {
+        result.longdouble_value = x;
+    }
+    return result;
+}
+
+template <typename T>
+static NPY_CASTING
+numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
+                                  PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
+                                  npy_intp *view_offset)
+{
+    if (given_descrs[1] == NULL) {
+        loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF);
+        if (loop_descrs[1] == nullptr) {
+            return (NPY_CASTING)-1;
+        }
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]);
+    return NPY_SAFE_CASTING;
+}
+
+template <typename T>
+static int
+numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                           npy_intp const dimensions[], npy_intp const strides[],
+                           void *NPY_UNUSED(auxdata))
+{
+    npy_intp N = dimensions[0];
+    char *in_ptr = data[0];
+    char *out_ptr = data[1];
+
+    QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
+    QuadBackendType backend = descr_out->backend;
+    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
+    while (N--) {
+        T in_val;
+        quad_value out_val;
+
+        memcpy(&in_val, in_ptr, sizeof(T));
+        out_val = to_quad<T>(in_val, backend);
+        memcpy(out_ptr, &out_val, elem_size);
+
+        in_ptr += strides[0];
+        out_ptr += strides[1];
+    }
+    return 0;
+}
+
+// Casting from QuadDType to other types
+
+template <typename T>
+static inline T
+from_quad(quad_value x, QuadBackendType backend);
+
+template <>
+inline npy_bool
+from_quad<npy_bool>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return Sleef_cast_to_int64q1(x.sleef_value) != 0;
+    }
+    else {
+        return x.longdouble_value != 0;
+    }
+}
+
+template <>
+inline npy_byte
+from_quad<npy_byte>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_byte)Sleef_cast_to_int64q1(x.sleef_value);
+    }
+    else {
+        return (npy_byte)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_short
+from_quad<npy_short>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_short)Sleef_cast_to_int64q1(x.sleef_value);
+    }
+    else {
+        return (npy_short)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_ushort
+from_quad<npy_ushort>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_ushort)Sleef_cast_to_uint64q1(x.sleef_value);
+    }
+    else {
+        return (npy_ushort)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_int
+from_quad<npy_int>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_int)Sleef_cast_to_int64q1(x.sleef_value);
+    }
+    else {
+        return (npy_int)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_uint
+from_quad<npy_uint>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_uint)Sleef_cast_to_uint64q1(x.sleef_value);
+    }
+    else {
+        return (npy_uint)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_long
+from_quad<npy_long>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_long)Sleef_cast_to_int64q1(x.sleef_value);
+    }
+    else {
+        return (npy_long)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_ulong
+from_quad<npy_ulong>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (npy_ulong)Sleef_cast_to_uint64q1(x.sleef_value);
+    }
+    else {
+        return (npy_ulong)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_longlong
+from_quad<npy_longlong>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return Sleef_cast_to_int64q1(x.sleef_value);
+    }
+    else {
+        return (npy_longlong)x.longdouble_value;
+    }
+}
+
+template <>
+inline npy_ulonglong
+from_quad<npy_ulonglong>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return Sleef_cast_to_uint64q1(x.sleef_value);
+    }
+    else {
+        return (npy_ulonglong)x.longdouble_value;
+    }
+}
+
+template <>
+inline float
+from_quad<float>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (float)Sleef_cast_to_doubleq1(x.sleef_value);
+    }
+    else {
+        return (float)x.longdouble_value;
+    }
+}
+
+template <>
+inline double
+from_quad<double>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return Sleef_cast_to_doubleq1(x.sleef_value);
+    }
+    else {
+        return (double)x.longdouble_value;
+    }
+}
+
+template <>
+inline long double
+from_quad<long double>(quad_value x, QuadBackendType backend)
+{
+    if (backend == BACKEND_SLEEF) {
+        return (long double)Sleef_cast_to_doubleq1(x.sleef_value);
+    }
+    else {
+        return x.longdouble_value;
+    }
+}
+
+template <typename T>
+static NPY_CASTING
+quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
+                                  PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
+                                  npy_intp *view_offset)
+{
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+
+    loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]);
+    return NPY_UNSAFE_CASTING;
+}
+
+template <typename T>
+static int
+quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                           npy_intp const dimensions[], npy_intp const strides[],
+                           void *NPY_UNUSED(auxdata))
+{
+    npy_intp N = dimensions[0];
+    char *in_ptr = data[0];
+    char *out_ptr = data[1];
+
+    QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = quad_descr->backend;
+
+    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
+    while (N--) {
+        quad_value in_val;
+        memcpy(&in_val, in_ptr, elem_size);
+
+        T out_val = from_quad<T>(in_val, backend);
+        memcpy(out_ptr, &out_val, sizeof(T));
+
+        in_ptr += strides[0];
+        out_ptr += strides[1];
+    }
+    return 0;
+}
 
 static PyArrayMethod_Spec *specs[NUM_CASTS + 1];  // +1 for NULL terminator
 static size_t spec_count = 0;
@@ -345,53 +529,53 @@ add_spec(PyArrayMethod_Spec *spec)
     }
 }
 
-// // functions to add casts
-// template <typename T>
-// void
-// add_cast_from(PyArray_DTypeMeta *to)
-// {
-//     PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{nullptr, to};
-
-//     PyType_Slot *slots = new PyType_Slot[3]{
-//             {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors<T>},
-//             {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop<T>},
-//             {0, nullptr}};
-
-//     PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
-//             .name = "cast_QuadPrec_to_NumPy",
-//             .nin = 1,
-//             .nout = 1,
-//             .casting = NPY_UNSAFE_CASTING,
-//             .flags = (NPY_ARRAYMETHOD_FLAGS)0,
-//             .dtypes = dtypes,
-//             .slots = slots,
-//     };
-//     add_spec(spec);
-// }
-
-// template <typename T>
-// void
-// add_cast_to(PyArray_DTypeMeta *from)
-// {
-//     PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, nullptr};
-
-//     PyType_Slot *slots = new PyType_Slot[3]{
-//             {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors<T>},
-//             {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop<T>},
-//             {0, nullptr}};
-
-//     PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
-//             .name = "cast_NumPy_to_QuadPrec",
-//             .nin = 1,
-//             .nout = 1,
-//             .casting = NPY_SAFE_CASTING,
-//             .flags = (NPY_ARRAYMETHOD_FLAGS)0,
-//             .dtypes = dtypes,
-//             .slots = slots,
-//     };
-
-//     add_spec(spec);
-// }
+// functions to add casts
+template <typename T>
+void
+add_cast_from(PyArray_DTypeMeta *to)
+{
+    PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, to};
+
+    PyType_Slot *slots = new PyType_Slot[3]{
+            {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors<T>},
+            {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop<T>},
+            {0, nullptr}};
+
+    PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
+            .name = "cast_QuadPrec_to_NumPy",
+            .nin = 1,
+            .nout = 1,
+            .casting = NPY_UNSAFE_CASTING,
+            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+            .dtypes = dtypes,
+            .slots = slots,
+    };
+    add_spec(spec);
+}
+
+template <typename T>
+void
+add_cast_to(PyArray_DTypeMeta *from)
+{
+    PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, &QuadPrecDType};
+
+    PyType_Slot *slots = new PyType_Slot[3]{
+            {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors<T>},
+            {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop<T>},
+            {0, nullptr}};
+
+    PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
+            .name = "cast_NumPy_to_QuadPrec",
+            .nin = 1,
+            .nout = 1,
+            .casting = NPY_SAFE_CASTING,
+            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+            .dtypes = dtypes,
+            .slots = slots,
+    };
+
+    add_spec(spec);
+}
 
 PyArrayMethod_Spec **
 init_casts_internal(void)
@@ -416,35 +600,33 @@ init_casts_internal(void)
 
     add_spec(quad2quad_spec);
 
-    // add_cast_to<npy_bool>(&PyArray_BoolDType);
-    // add_cast_to<npy_byte>(&PyArray_ByteDType);
-    // add_cast_to<npy_ubyte>(&PyArray_UByteDType);
-    // add_cast_to<npy_short>(&PyArray_ShortDType);
-    // add_cast_to<npy_ushort>(&PyArray_UShortDType);
-    // add_cast_to<npy_int>(&PyArray_IntDType);
-    // add_cast_to<npy_uint>(&PyArray_UIntDType);
-    // add_cast_to<npy_long>(&PyArray_LongDType);
-    // add_cast_to<npy_ulong>(&PyArray_ULongDType);
-    // add_cast_to<npy_longlong>(&PyArray_LongLongDType);
-    // add_cast_to<npy_ulonglong>(&PyArray_ULongLongDType);
-    // add_cast_to<float>(&PyArray_FloatDType);
-    // add_cast_to<double>(&PyArray_DoubleDType);
-    // add_cast_to<long double>(&PyArray_LongDoubleDType);
-
-    // add_cast_from<npy_bool>(&PyArray_BoolDType);
-    // add_cast_from<npy_byte>(&PyArray_ByteDType);
-    // add_cast_from<npy_ubyte>(&PyArray_UByteDType);
-    // add_cast_from<npy_short>(&PyArray_ShortDType);
-    // add_cast_from<npy_ushort>(&PyArray_UShortDType);
-    // add_cast_from<npy_int>(&PyArray_IntDType);
-    // add_cast_from<npy_uint>(&PyArray_UIntDType);
-    // add_cast_from<npy_long>(&PyArray_LongDType);
-    // add_cast_from<npy_ulong>(&PyArray_ULongDType);
-    // add_cast_from<npy_longlong>(&PyArray_LongLongDType);
-    // add_cast_from<npy_ulonglong>(&PyArray_ULongLongDType);
-    // add_cast_from<float>(&PyArray_FloatDType);
-    // add_cast_from<double>(&PyArray_DoubleDType);
-    // add_cast_from<long double>(&PyArray_LongDoubleDType);
+    add_cast_to<npy_bool>(&PyArray_BoolDType);
+    add_cast_to<npy_byte>(&PyArray_ByteDType);
+    add_cast_to<npy_short>(&PyArray_ShortDType);
+    add_cast_to<npy_ushort>(&PyArray_UShortDType);
+    add_cast_to<npy_int>(&PyArray_IntDType);
+    add_cast_to<npy_uint>(&PyArray_UIntDType);
+    add_cast_to<npy_long>(&PyArray_LongDType);
+    add_cast_to<npy_ulong>(&PyArray_ULongDType);
+    add_cast_to<npy_longlong>(&PyArray_LongLongDType);
+    add_cast_to<npy_ulonglong>(&PyArray_ULongLongDType);
+    add_cast_to<float>(&PyArray_FloatDType);
+    add_cast_to<double>(&PyArray_DoubleDType);
+    add_cast_to<long double>(&PyArray_LongDoubleDType);
+
+    add_cast_from<npy_bool>(&PyArray_BoolDType);
+    add_cast_from<npy_byte>(&PyArray_ByteDType);
+    add_cast_from<npy_short>(&PyArray_ShortDType);
+    add_cast_from<npy_ushort>(&PyArray_UShortDType);
+    add_cast_from<npy_int>(&PyArray_IntDType);
+    add_cast_from<npy_uint>(&PyArray_UIntDType);
+    add_cast_from<npy_long>(&PyArray_LongDType);
+    add_cast_from<npy_ulong>(&PyArray_ULongDType);
+    add_cast_from<npy_longlong>(&PyArray_LongLongDType);
+    add_cast_from<npy_ulonglong>(&PyArray_ULongLongDType);
+    add_cast_from<float>(&PyArray_FloatDType);
+    add_cast_from<double>(&PyArray_DoubleDType);
+    add_cast_from<long double>(&PyArray_LongDoubleDType);
 
     specs[spec_count] = nullptr;
     return specs;
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index a00c37c9..9e496dd8 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -201,7 +201,7 @@ QuadPrecDType_repr(QuadPrecDTypeObject *self)
 
 PyArray_DTypeMeta QuadPrecDType = {
         {{
-                PyVarObject_HEAD_INIT(NULL, 0).tp_name = "QuadPrecDType.QuadPrecDType",
+                PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecDType",
                 .tp_basicsize = sizeof(QuadPrecDTypeObject),
                 .tp_new = QuadPrecDType_new,
                 .tp_repr = (reprfunc)QuadPrecDType_repr,
diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h
index fcdf6f2b..190573af 100644
--- a/quaddtype/numpy_quaddtype/src/quad_common.h
+++ b/quaddtype/numpy_quaddtype/src/quad_common.h
@@ -6,7 +6,7 @@ extern "C" {
 #endif
 
 typedef enum {
-    BACKEND_SLEEF = 0,
+    BACKEND_SLEEF,
     BACKEND_LONGDOUBLE
 } QuadBackendType;
 
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index a3101286..8c267d42 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -75,8 +75,29 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend)
             self->value.longdouble_value = (long double)val;
         }
     }
-    else {
-        PyErr_SetString(PyExc_TypeError, "QuadPrecision value must be a float, int or string");
+    else 
+    {
+        PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value));
+        if (type_str != NULL) {
+            const char *type_cstr = PyUnicode_AsUTF8(type_str);
+            if (type_cstr != NULL) {
+                PyErr_Format(
+                        PyExc_TypeError,
+                        "QuadPrecision value must be a float, int or string, but got %s instead",
+                        type_cstr);
+            }
+            else {
+                PyErr_SetString(PyExc_TypeError,
+                                "QuadPrecision value must be a float, int or string, but got an "
+                                "unknown type instead");
+            }
+            Py_DECREF(type_str);
+        }
+        else {
+            PyErr_SetString(PyExc_TypeError,
+                            "QuadPrecision value must be a float, int or string, but got an "
+                            "unknown type instead");
+        }
         Py_DECREF(self);
         return NULL;
     }

From a652985a16316cf6f56154c4af345133a4239140 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sun, 1 Sep 2024 12:34:59 +0530
Subject: [PATCH 04/32] added multi backend support to scalar operations

---
 quaddtype/meson.build                        |  10 +-
 quaddtype/numpy_quaddtype/src/ops.hpp        | 220 ++++++++++++++++++-
 quaddtype/numpy_quaddtype/src/scalar.c       |   6 +-
 quaddtype/numpy_quaddtype/src/scalar_ops.cpp | 169 +++++++++-----
 quaddtype/reinstall.sh                       |   2 +-
 5 files changed, 332 insertions(+), 75 deletions(-)

diff --git a/quaddtype/meson.build b/quaddtype/meson.build
index 50dc67b1..1b9f6248 100644
--- a/quaddtype/meson.build
+++ b/quaddtype/meson.build
@@ -5,8 +5,8 @@ py = py_mod.find_installation()
 
 c = meson.get_compiler('c')
 
-sleef_dep = c.find_library('sleef', dirs:['/usr/local/lib'])
-sleefquad_dep = c.find_library('sleefquad', dirs:['/usr/local/lib'])
+sleef_dep = c.find_library('sleef')
+sleefquad_dep = c.find_library('sleefquad')
 
 incdir_numpy = run_command(py,
   [
@@ -31,9 +31,9 @@ srcs = [
     'numpy_quaddtype/src/dtype.h',
     'numpy_quaddtype/src/dtype.c',
     'numpy_quaddtype/src/quaddtype_main.c',
-    # 'numpy_quaddtype/src/scalar_ops.h',
-    # 'numpy_quaddtype/src/scalar_ops.cpp',
-    # 'numpy_quaddtype/src/ops.hpp',
+    'numpy_quaddtype/src/scalar_ops.h',
+    'numpy_quaddtype/src/scalar_ops.cpp',
+    'numpy_quaddtype/src/ops.hpp',
     # 'numpy_quaddtype/src/umath.h',
     # 'numpy_quaddtype/src/umath.cpp'
 ]
diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp
index 6a3511c4..3e290da6 100644
--- a/quaddtype/numpy_quaddtype/src/ops.hpp
+++ b/quaddtype/numpy_quaddtype/src/ops.hpp
@@ -1,9 +1,11 @@
 #include <sleef.h>
 #include <sleefquad.h>
+#include <cmath>
 
-typedef int (*unary_op_def)(Sleef_quad *, Sleef_quad *);
+// Unary Quad Operations
+typedef int (*unary_op_quad_def)(Sleef_quad *, Sleef_quad *);
 
-static inline int
+static int
 quad_negative(Sleef_quad *op, Sleef_quad *out)
 {
     *out = Sleef_negq1(*op);
@@ -108,8 +110,116 @@ quad_exp2(Sleef_quad *op, Sleef_quad *out)
     return 0;
 }
 
-// binary ops
-typedef int (*binop_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *);
+// Unary long double operations
+typedef int (*unary_op_longdouble_def)(long double *, long double *);
+
+static int
+ld_negative(long double *op, long double *out)
+{
+    *out = -(*op);
+    return 0;
+}
+
+static int
+ld_positive(long double *op, long double *out)
+{
+    *out = *op;
+    return 0;
+}
+
+static inline int
+ld_absolute(long double *op, long double *out)
+{
+    *out = fabsl(*op);
+    return 0;
+}
+
+static inline int
+ld_rint(long double *op, long double *out)
+{
+    *out = rintl(*op);
+    return 0;
+}
+
+static inline int
+ld_trunc(long double *op, long double *out)
+{
+    *out = truncl(*op);
+    return 0;
+}
+
+static inline int
+ld_floor(long double *op, long double *out)
+{
+    *out = floorl(*op);
+    return 0;
+}
+
+static inline int
+ld_ceil(long double *op, long double *out)
+{
+    *out = ceill(*op);
+    return 0;
+}
+
+static inline int
+ld_sqrt(long double *op, long double *out)
+{
+    *out = sqrtl(*op);
+    return 0;
+}
+
+static inline int
+ld_square(long double *op, long double *out)
+{
+    *out = (*op) * (*op);
+    return 0;
+}
+
+static inline int
+ld_log(long double *op, long double *out)
+{
+    *out = logl(*op);
+    return 0;
+}
+
+static inline int
+ld_log2(long double *op, long double *out)
+{
+    *out = log2l(*op);
+    return 0;
+}
+
+static inline int
+ld_log10(long double *op, long double *out)
+{
+    *out = log10l(*op);
+    return 0;
+}
+
+static inline int
+ld_log1p(long double *op, long double *out)
+{
+    *out = log1pl(*op);
+    return 0;
+}
+
+static inline int
+ld_exp(long double *op, long double *out)
+{
+    *out = expl(*op);
+    return 0;
+}
+
+static inline int
+ld_exp2(long double *op, long double *out)
+{
+    *out = exp2l(*op);
+    return 0;
+}
+
+// Binary Quad operations
+typedef int (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *);
 
 static inline int
 quad_add(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
@@ -167,8 +277,67 @@ quad_maximum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
     return 0;
 }
 
-// comparison functions
-typedef npy_bool (*cmp_def)(const Sleef_quad *, const Sleef_quad *);
+// Binary long double operations
+typedef int (*binary_op_longdouble_def)(long double *, long double *, long double *);
+
+static inline int
+ld_add(long double *out, long double *in1, long double *in2)
+{
+    *out = (*in1) + (*in2);
+    return 0;
+}
+
+static inline int
+ld_sub(long double *out, long double *in1, long double *in2)
+{
+    *out = (*in1) - (*in2);
+    return 0;
+}
+
+static inline int
+ld_mul(long double *res, long double *a, long double *b)
+{
+    *res = (*a) * (*b);
+    return 0;
+}
+
+static inline int
+ld_div(long double *res, long double *a, long double *b)
+{
+    *res = (*a) / (*b);
+    return 0;
+}
+
+static inline int
+ld_pow(long double *res, long double *a, long double *b)
+{
+    *res = powl(*a, *b);
+    return 0;
+}
+
+static inline int
+ld_mod(long double *res, long double *a, long double *b)
+{
+    *res = fmodl(*a, *b);
+    return 0;
+}
+
+static inline int
+ld_minimum(long double *out, long double *in1, long double *in2)
+{
+    *out = (*in1 < *in2) ? *in1 : *in2;
+    return 0;
+}
+
+static inline int
+ld_maximum(long double *out, long double *in1, long double *in2)
+{
+    *out = (*in1 > *in2) ? *in1 : *in2;
+    return 0;
+}
+
+// comparison quad functions
+typedef npy_bool (*cmp_quad_def)(const Sleef_quad *, const Sleef_quad *);
 
 static inline npy_bool
 quad_equal(const Sleef_quad *a, const Sleef_quad *b)
@@ -204,4 +373,43 @@ static inline npy_bool
 quad_greaterequal(const Sleef_quad *a, const Sleef_quad *b)
 {
     return Sleef_icmpgeq1(*a, *b);
+}
+
+// comparison quad functions
+typedef npy_bool (*cmp_londouble_def)(const long double *, const double *);
+
+static inline npy_bool
+ld_equal(const long double *a, const long double *b)
+{
+    return *a == *b;
+}
+
+static inline npy_bool
+ld_notequal(const long double *a, const long double *b)
+{
+    return *a != *b;
+}
+
+static inline npy_bool
+ld_less(const long double *a, const long double *b)
+{
+    return *a < *b;
+}
+
+static inline npy_bool
+ld_lessequal(const long double *a, const long double *b)
+{
+    return *a <= *b;
+}
+
+static inline npy_bool
+ld_greater(const long double *a, const long double *b)
+{
+    return *a > *b;
+}
+
+static inline npy_bool
+ld_greaterequal(const long double *a, const long double *b)
+{
+    return *a >= *b;
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 8c267d42..0b2967e5 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -12,7 +12,7 @@
 #include "numpy/dtype_api.h"
 
 #include "scalar.h"
-// #include "scalar_ops.h"
+#include "scalar_ops.h"
 
 QuadPrecisionObject *
 QuadPrecision_raw_new(QuadBackendType backend)
@@ -168,8 +168,8 @@ PyTypeObject QuadPrecision_Type = {
         .tp_dealloc = (destructor)QuadPrecision_dealloc,
         .tp_repr = (reprfunc)QuadPrecision_repr,
         .tp_str = (reprfunc)QuadPrecision_str,
-        // .tp_as_number = &quad_as_scalar,
-        // .tp_richcompare = (richcmpfunc)quad_richcompare
+        .tp_as_number = &quad_as_scalar,
+        .tp_richcompare = (richcmpfunc)quad_richcompare
 
 };
 
diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
index a51bc69a..156e4288 100644
--- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
+++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
@@ -16,27 +16,38 @@ extern "C" {
 #include "scalar.h"
 #include "ops.hpp"
 #include "scalar_ops.h"
+#include "quad_common.h"
 
-template <unary_op_def unary_op>
+template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
 static PyObject *
 quad_unary_func(QuadPrecisionObject *self)
 {
-    QuadPrecisionObject *res = QuadPrecision_raw_new();
+    QuadPrecisionObject *res = QuadPrecision_raw_new(self->backend);
     if (!res) {
         return NULL;
     }
 
-    unary_op(&self->quad.value, &res->quad.value);
+    if (self->backend == BACKEND_SLEEF) {
+        sleef_op(&self->value.sleef_value, &res->value.sleef_value);
+    }
+    else {
+        longdouble_op(&self->value.longdouble_value, &res->value.longdouble_value);
+    }
     return (PyObject *)res;
 }
 
 PyObject *
 quad_nonzero(QuadPrecisionObject *self)
 {
-    return PyBool_FromLong(Sleef_icmpneq1(self->quad.value, Sleef_cast_from_int64q1(0)));
+    if (self->backend == BACKEND_SLEEF) {
+        return PyBool_FromLong(Sleef_icmpneq1(self->value.sleef_value, Sleef_cast_from_int64q1(0)));
+    }
+    else {
+        return PyBool_FromLong(self->value.longdouble_value != 0.0L);
+    }
 }
 
-template <binop_def binary_op>
+template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
 static PyObject *
 quad_binary_func(PyObject *op1, PyObject *op2)
 {
@@ -44,65 +55,68 @@ quad_binary_func(PyObject *op1, PyObject *op2)
     PyObject *other;
     QuadPrecisionObject *other_quad = NULL;
     int is_forward;
+    QuadBackendType backend;
 
     if (PyObject_TypeCheck(op1, &QuadPrecision_Type)) {
         is_forward = 1;
         self = (QuadPrecisionObject *)op1;
         other = Py_NewRef(op2);
+        backend = self->backend;
     }
     else {
         is_forward = 0;
         self = (QuadPrecisionObject *)op2;
         other = Py_NewRef(op1);
+        backend = self->backend;
     }
 
     if (PyObject_TypeCheck(other, &QuadPrecision_Type)) {
         Py_INCREF(other);
         other_quad = (QuadPrecisionObject *)other;
+        if (other_quad->backend != backend) {
+            PyErr_SetString(PyExc_TypeError, "Cannot mix QuadPrecision backends");
+            Py_DECREF(other);
+            return NULL;
+        }
     }
     else if (PyLong_Check(other) || PyFloat_Check(other)) {
-        other_quad = QuadPrecision_raw_new();
+        other_quad = QuadPrecision_from_object(other, backend);
         if (!other_quad) {
             Py_DECREF(other);
             return NULL;
         }
-
-        if (PyLong_Check(other)) {
-            long long value = PyLong_AsLongLong(other);
-            if (value == -1 && PyErr_Occurred()) {
-                Py_DECREF(other);
-                Py_DECREF(other_quad);
-                return NULL;
-            }
-            other_quad->quad.value = Sleef_cast_from_int64q1(value);
-        }
-        else {
-            double value = PyFloat_AsDouble(other);
-            if (value == -1.0 && PyErr_Occurred()) {
-                Py_DECREF(other);
-                Py_DECREF(other_quad);
-                return NULL;
-            }
-            other_quad->quad.value = Sleef_cast_from_doubleq1(value);
-        }
     }
     else {
         Py_DECREF(other);
         Py_RETURN_NOTIMPLEMENTED;
     }
 
-    QuadPrecisionObject *res = QuadPrecision_raw_new();
+    QuadPrecisionObject *res = QuadPrecision_raw_new(backend);
     if (!res) {
         Py_DECREF(other_quad);
         Py_DECREF(other);
         return NULL;
     }
 
-    if (is_forward) {
-        binary_op(&res->quad.value, &self->quad.value, &other_quad->quad.value);
+    if (backend == BACKEND_SLEEF) {
+        if (is_forward) {
+            sleef_op(&res->value.sleef_value, &self->value.sleef_value,
+                     &other_quad->value.sleef_value);
+        }
+        else {
+            sleef_op(&res->value.sleef_value, &other_quad->value.sleef_value,
+                     &self->value.sleef_value);
+        }
     }
     else {
-        binary_op(&res->quad.value, &other_quad->quad.value, &self->quad.value);
+        if (is_forward) {
+            longdouble_op(&res->value.longdouble_value, &self->value.longdouble_value,
+                          &other_quad->value.longdouble_value);
+        }
+        else {
+            longdouble_op(&res->value.longdouble_value, &other_quad->value.longdouble_value,
+                          &self->value.longdouble_value);
+        }
     }
 
     Py_DECREF(other_quad);
@@ -115,13 +129,20 @@ PyObject *
 quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op)
 {
     QuadPrecisionObject *other_quad = NULL;
+    QuadBackendType backend = self->backend;
 
     if (PyObject_TypeCheck(other, &QuadPrecision_Type)) {
         Py_INCREF(other);
         other_quad = (QuadPrecisionObject *)other;
+        if (other_quad->backend != backend) {
+            PyErr_SetString(PyExc_TypeError,
+                            "Cannot compare QuadPrecision objects with different backends");
+            Py_DECREF(other_quad);
+            return NULL;
+        }
     }
     else if (PyLong_CheckExact(other) || PyFloat_CheckExact(other)) {
-        other_quad = QuadPrecision_from_object(other);
+        other_quad = QuadPrecision_from_object(other, backend);
         if (other_quad == NULL) {
             return NULL;
         }
@@ -129,29 +150,57 @@ quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op)
     else {
         Py_RETURN_NOTIMPLEMENTED;
     }
+
     int cmp;
-    switch (cmp_op) {
-        case Py_LT:
-            cmp = Sleef_icmpltq1(self->quad.value, other_quad->quad.value);
-            break;
-        case Py_LE:
-            cmp = Sleef_icmpleq1(self->quad.value, other_quad->quad.value);
-            break;
-        case Py_EQ:
-            cmp = Sleef_icmpeqq1(self->quad.value, other_quad->quad.value);
-            break;
-        case Py_NE:
-            cmp = Sleef_icmpneq1(self->quad.value, other_quad->quad.value);
-            break;
-        case Py_GT:
-            cmp = Sleef_icmpgtq1(self->quad.value, other_quad->quad.value);
-            break;
-        case Py_GE:
-            cmp = Sleef_icmpgeq1(self->quad.value, other_quad->quad.value);
-            break;
-        default:
-            Py_DECREF(other_quad);
-            Py_RETURN_NOTIMPLEMENTED;
+    if (backend == BACKEND_SLEEF) {
+        switch (cmp_op) {
+            case Py_LT:
+                cmp = Sleef_icmpltq1(self->value.sleef_value, other_quad->value.sleef_value);
+                break;
+            case Py_LE:
+                cmp = Sleef_icmpleq1(self->value.sleef_value, other_quad->value.sleef_value);
+                break;
+            case Py_EQ:
+                cmp = Sleef_icmpeqq1(self->value.sleef_value, other_quad->value.sleef_value);
+                break;
+            case Py_NE:
+                cmp = Sleef_icmpneq1(self->value.sleef_value, other_quad->value.sleef_value);
+                break;
+            case Py_GT:
+                cmp = Sleef_icmpgtq1(self->value.sleef_value, other_quad->value.sleef_value);
+                break;
+            case Py_GE:
+                cmp = Sleef_icmpgeq1(self->value.sleef_value, other_quad->value.sleef_value);
+                break;
+            default:
+                Py_DECREF(other_quad);
+                Py_RETURN_NOTIMPLEMENTED;
+        }
+    }
+    else {
+        switch (cmp_op) {
+            case Py_LT:
+                cmp = self->value.longdouble_value < other_quad->value.longdouble_value;
+                break;
+            case Py_LE:
+                cmp = self->value.longdouble_value <= other_quad->value.longdouble_value;
+                break;
+            case Py_EQ:
+                cmp = self->value.longdouble_value == other_quad->value.longdouble_value;
+                break;
+            case Py_NE:
+                cmp = self->value.longdouble_value != other_quad->value.longdouble_value;
+                break;
+            case Py_GT:
+                cmp = self->value.longdouble_value > other_quad->value.longdouble_value;
+                break;
+            case Py_GE:
+                cmp = self->value.longdouble_value >= other_quad->value.longdouble_value;
+                break;
+            default:
+                Py_DECREF(other_quad);
+                Py_RETURN_NOTIMPLEMENTED;
+        }
     }
     Py_DECREF(other_quad);
 
@@ -159,13 +208,13 @@ quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op)
 }
 
 PyNumberMethods quad_as_scalar = {
-        .nb_add = (binaryfunc)quad_binary_func<quad_add>,
-        .nb_subtract = (binaryfunc)quad_binary_func<quad_sub>,
-        .nb_multiply = (binaryfunc)quad_binary_func<quad_mul>,
-        .nb_power = (ternaryfunc)quad_binary_func<quad_pow>,
-        .nb_negative = (unaryfunc)quad_unary_func<quad_negative>,
-        .nb_positive = (unaryfunc)quad_unary_func<quad_positive>,
-        .nb_absolute = (unaryfunc)quad_unary_func<quad_absolute>,
+        .nb_add = (binaryfunc)quad_binary_func<quad_add, ld_add>,
+        .nb_subtract = (binaryfunc)quad_binary_func<quad_sub, ld_sub>,
+        .nb_multiply = (binaryfunc)quad_binary_func<quad_mul, ld_mul>,
+        .nb_power = (ternaryfunc)quad_binary_func<quad_pow, ld_pow>,
+        .nb_negative = (unaryfunc)quad_unary_func<quad_negative, ld_negative>,
+        .nb_positive = (unaryfunc)quad_unary_func<quad_positive, ld_positive>,
+        .nb_absolute = (unaryfunc)quad_unary_func<quad_absolute, ld_absolute>,
         .nb_bool = (inquiry)quad_nonzero,
-        .nb_true_divide = (binaryfunc)quad_binary_func<quad_div>,
+        .nb_true_divide = (binaryfunc)quad_binary_func<quad_div, ld_div>,
 };
\ No newline at end of file
diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh
index afec2b59..f552d7a7 100755
--- a/quaddtype/reinstall.sh
+++ b/quaddtype/reinstall.sh
@@ -8,6 +8,6 @@ then
 fi
 
 #meson setup build -Db_sanitize=address,undefined
-python -m pip uninstall -y quaddtype numpy_quaddtype
+python -m pip uninstall -y numpy_quaddtype
 python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug"
 #python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v'
\ No newline at end of file

From 7fd8da6627a9be5be350128529e2678188e285cd Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sun, 1 Sep 2024 21:14:11 +0530
Subject: [PATCH 05/32] failing: londouble with different dtype

---
 quaddtype/numpy_quaddtype/src/dtype.c         |   8 +-
 quaddtype/numpy_quaddtype/src/dtype.h         |   6 +-
 quaddtype/numpy_quaddtype/src/ops.hpp         |   2 +-
 quaddtype/numpy_quaddtype/src/quad_common.h   |   1 +
 .../numpy_quaddtype/src/quaddtype_main.c      |   8 +-
 quaddtype/numpy_quaddtype/src/umath.cpp       | 323 +++++++++++-------
 quaddtype/reinstall.sh                        |   4 +-
 temp.py                                       | 207 +++++++++++
 8 files changed, 420 insertions(+), 139 deletions(-)
 create mode 100644 temp.py

diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index 9e496dd8..b655f112 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -57,7 +57,6 @@ new_quaddtype_instance(QuadBackendType backend)
     new->base.elsize = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
     new->base.alignment = (backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double);
     new->backend = backend;
-
     return new;
 }
 
@@ -73,7 +72,7 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 {
     if (dtype1->backend != dtype2->backend) {
         PyErr_SetString(PyExc_TypeError,
-                        "Cannot combine QuadPrecDType instances with different backends");
+                        "Cannot find common instance for QuadPrecDTypes with different backends");
         return NULL;
     }
     Py_INCREF(dtype1);
@@ -189,7 +188,8 @@ QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds)
         return NULL;
     }
 
-    return (PyObject *)new_quaddtype_instance(backend);
+    return (PyObject *)quadprec_discover_descriptor_from_pyobject(
+            &QuadPrecDType, (PyObject *)QuadPrecision_raw_new(backend));
 }
 
 static PyObject *
@@ -217,7 +217,7 @@ init_quadprec_dtype(void)
         return -1;
 
     PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = {
-            .flags = NPY_DT_NUMERIC,
+            .flags = NPY_DT_PARAMETRIC | NPY_DT_NUMERIC,
             .casts = casts,
             .typeobj = &QuadPrecision_Type,
             .slots = QuadPrecDType_Slots,
diff --git a/quaddtype/numpy_quaddtype/src/dtype.h b/quaddtype/numpy_quaddtype/src/dtype.h
index 243018d3..77a65449 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.h
+++ b/quaddtype/numpy_quaddtype/src/dtype.h
@@ -17,9 +17,11 @@ typedef struct {
 
 extern PyArray_DTypeMeta QuadPrecDType;
 
-QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend);
+QuadPrecDTypeObject *
+new_quaddtype_instance(QuadBackendType backend);
 
-int init_quadprec_dtype(void);
+int
+init_quadprec_dtype(void);
 
 #ifdef __cplusplus
 }
diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp
index 3e290da6..9b3eeed1 100644
--- a/quaddtype/numpy_quaddtype/src/ops.hpp
+++ b/quaddtype/numpy_quaddtype/src/ops.hpp
@@ -376,7 +376,7 @@ quad_greaterequal(const Sleef_quad *a, const Sleef_quad *b)
 }
 
 // comparison quad functions
-typedef npy_bool (*cmp_londouble_def)(const long double *, const double *);
+typedef npy_bool (*cmp_londouble_def)(const long double *, const long double *);
 
 static inline npy_bool
 ld_equal(const long double *a, const long double *b)
diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h
index 190573af..bc578a29 100644
--- a/quaddtype/numpy_quaddtype/src/quad_common.h
+++ b/quaddtype/numpy_quaddtype/src/quad_common.h
@@ -6,6 +6,7 @@ extern "C" {
 #endif
 
 typedef enum {
+    BACKEND_INVALID = -1,
     BACKEND_SLEEF,
     BACKEND_LONGDOUBLE
 } QuadBackendType;
diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index ce263e49..994d1eba 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -11,7 +11,7 @@
 
 #include "scalar.h"
 #include "dtype.h"
-// #include "umath.h"
+#include "umath.h"
 #include "quad_common.h"
 
 static struct PyModuleDef moduledef = {
@@ -43,9 +43,9 @@ PyInit__quaddtype_main(void)
     if (PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0)
         goto error;
 
-    // if (init_quad_umath() < 0) {
-    //     goto error;
-    // }
+    if (init_quad_umath() < 0) {
+        goto error;
+    }
 
     return m;
 
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 9007f890..0d64ff7a 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -1,5 +1,3 @@
-#include "scalar.h"
-
 #define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
 #define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
 #define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
@@ -17,11 +15,40 @@ extern "C" {
 
 #include "numpy/dtype_api.h"
 }
+#include "quad_common.h"
+#include "scalar.h"
 #include "dtype.h"
 #include "umath.h"
 #include "ops.hpp"
 
-template <unary_op_def unary_op>
+static NPY_CASTING
+quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
+                                  PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[],
+                                  npy_intp *NPY_UNUSED(view_offset))
+{
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+
+    if (given_descrs[1] == NULL) {
+        Py_INCREF(given_descrs[0]);
+        loop_descrs[1] = given_descrs[0];
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)given_descrs[0];
+    QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[1];
+
+    if (descr_in->backend != descr_out->backend) {
+        return NPY_UNSAFE_CASTING;
+    }
+
+    return NPY_NO_CASTING;
+}
+
+template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
 int
 quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const data[],
                                    npy_intp const dimensions[], npy_intp const strides[],
@@ -33,11 +60,20 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d
     npy_intp in_stride = strides[0];
     npy_intp out_stride = strides[1];
 
-    Sleef_quad in, out;
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
+    quad_value in, out;
     while (N--) {
-        memcpy(&in, in_ptr, sizeof(Sleef_quad));
-        unary_op(&in, &out);
-        memcpy(out_ptr, &out, sizeof(Sleef_quad));
+        memcpy(&in, in_ptr, elem_size);
+        if (backend == BACKEND_SLEEF) {
+            sleef_op(&in.sleef_value, &out.sleef_value);
+        }
+        else {
+            longdouble_op(&in.longdouble_value, &out.longdouble_value);
+        }
+        memcpy(out_ptr, &out, elem_size);
 
         in_ptr += in_stride;
         out_ptr += out_stride;
@@ -45,26 +81,7 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d
     return 0;
 }
 
-static NPY_CASTING
-quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
-                                  PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[],
-                                  npy_intp *NPY_UNUSED(view_offset))
-{
-    Py_INCREF(given_descrs[0]);
-    loop_descrs[0] = given_descrs[0];
-
-    if (given_descrs[1] == NULL) {
-        Py_INCREF(given_descrs[0]);
-        loop_descrs[1] = given_descrs[0];
-        return NPY_NO_CASTING;
-    }
-    Py_INCREF(given_descrs[1]);
-    loop_descrs[1] = given_descrs[1];
-
-    return NPY_NO_CASTING;
-}
-
-template <unary_op_def unary_op>
+template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
 int
 create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
 {
@@ -77,7 +94,8 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
 
     PyType_Slot slots[] = {
             {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors},
-            {NPY_METH_strided_loop, (void *)&quad_generic_unary_op_strided_loop<unary_op>},
+            {NPY_METH_strided_loop,
+             (void *)&quad_generic_unary_op_strided_loop<sleef_op, longdouble_op>},
             {0, NULL}};
 
     PyArrayMethod_Spec Spec = {
@@ -100,46 +118,46 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
 int
 init_quad_unary_ops(PyObject *numpy)
 {
-    if (create_quad_unary_ufunc<quad_negative>(numpy, "negative") < 0) {
+    if (create_quad_unary_ufunc<quad_negative, ld_negative>(numpy, "negative") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_absolute>(numpy, "absolute") < 0) {
+    if (create_quad_unary_ufunc<quad_absolute, ld_absolute>(numpy, "absolute") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_rint>(numpy, "rint") < 0) {
+    if (create_quad_unary_ufunc<quad_rint, ld_rint>(numpy, "rint") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_trunc>(numpy, "trunc") < 0) {
+    if (create_quad_unary_ufunc<quad_trunc, ld_trunc>(numpy, "trunc") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_floor>(numpy, "floor") < 0) {
+    if (create_quad_unary_ufunc<quad_floor, ld_floor>(numpy, "floor") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_ceil>(numpy, "ceil") < 0) {
+    if (create_quad_unary_ufunc<quad_ceil, ld_ceil>(numpy, "ceil") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_sqrt>(numpy, "sqrt") < 0) {
+    if (create_quad_unary_ufunc<quad_sqrt, ld_sqrt>(numpy, "sqrt") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_square>(numpy, "square") < 0) {
+    if (create_quad_unary_ufunc<quad_square, ld_square>(numpy, "square") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_log>(numpy, "log") < 0) {
+    if (create_quad_unary_ufunc<quad_log, ld_log>(numpy, "log") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_log2>(numpy, "log2") < 0) {
+    if (create_quad_unary_ufunc<quad_log2, ld_log2>(numpy, "log2") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_log10>(numpy, "log10") < 0) {
+    if (create_quad_unary_ufunc<quad_log10, ld_log10>(numpy, "log10") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_log1p>(numpy, "log1p") < 0) {
+    if (create_quad_unary_ufunc<quad_log1p, ld_log1p>(numpy, "log1p") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_exp>(numpy, "exp") < 0) {
+    if (create_quad_unary_ufunc<quad_exp, ld_exp>(numpy, "exp") < 0) {
         return -1;
     }
-    if (create_quad_unary_ufunc<quad_exp2>(numpy, "exp2") < 0) {
+    if (create_quad_unary_ufunc<quad_exp2, ld_exp2>(numpy, "exp2") < 0) {
         return -1;
     }
     return 0;
@@ -147,33 +165,6 @@ init_quad_unary_ops(PyObject *numpy)
 
 // Binary ufuncs
 
-template <binop_def binop>
-int
-quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                                npy_intp const dimensions[], npy_intp const strides[],
-                                NpyAuxData *auxdata)
-{
-    npy_intp N = dimensions[0];
-    char *in1_ptr = data[0], *in2_ptr = data[1];
-    char *out_ptr = data[2];
-    npy_intp in1_stride = strides[0];
-    npy_intp in2_stride = strides[1];
-    npy_intp out_stride = strides[2];
-
-    Sleef_quad in1, in2, out;
-    while (N--) {
-        memcpy(&in1, in1_ptr, sizeof(Sleef_quad));
-        memcpy(&in2, in2_ptr, sizeof(Sleef_quad));
-        binop(&out, &in1, &in2);
-        memcpy(out_ptr, &out, sizeof(Sleef_quad));
-
-        in1_ptr += in1_stride;
-        in2_ptr += in2_stride;
-        out_ptr += out_stride;
-    }
-    return 0;
-}
-
 static NPY_CASTING
 quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
                                    PyArray_Descr *const given_descrs[],
@@ -184,22 +175,72 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
     Py_INCREF(given_descrs[1]);
     loop_descrs[1] = given_descrs[1];
 
+    QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
+    QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
+
+    if (descr_in1->backend != descr_in2->backend) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Cannot operate on QuadPrecision objects with different backends");
+        return (NPY_CASTING)-1;
+    }
+
     if (given_descrs[2] == NULL) {
-        PyArray_Descr *out_descr = (PyArray_Descr *)new_quaddtype_instance();
-        if (!out_descr) {
+        loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(descr_in1->backend);
+        if (!loop_descrs[2]) {
             return (NPY_CASTING)-1;
         }
-        Py_INCREF(given_descrs[0]);
-        loop_descrs[2] = out_descr;
     }
     else {
         Py_INCREF(given_descrs[2]);
         loop_descrs[2] = given_descrs[2];
     }
 
+    QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[2];
+    if (descr_out->backend != descr_in1->backend) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Output QuadPrecision object must have the same backend as inputs");
+        return (NPY_CASTING)-1;
+    }
+
     return NPY_NO_CASTING;
 }
 
+template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
+int
+quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                                npy_intp const dimensions[], npy_intp const strides[],
+                                NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];
+    char *in1_ptr = data[0], *in2_ptr = data[1];
+    char *out_ptr = data[2];
+    npy_intp in1_stride = strides[0];
+    npy_intp in2_stride = strides[1];
+    npy_intp out_stride = strides[2];
+
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
+    quad_value in1, in2, out;
+    while (N--) {
+        memcpy(&in1, in1_ptr, elem_size);
+        memcpy(&in2, in2_ptr, elem_size);
+        if (backend == BACKEND_SLEEF) {
+            sleef_op(&out.sleef_value, &in1.sleef_value, &in2.sleef_value);
+        }
+        else {
+            longdouble_op(&out.longdouble_value, &in1.longdouble_value, &in2.longdouble_value);
+        }
+        memcpy(out_ptr, &out, elem_size);
+
+        in1_ptr += in1_stride;
+        in2_ptr += in2_stride;
+        out_ptr += out_stride;
+    }
+    return 0;
+}
+
 // helper debugging function
 static const char *
 get_dtype_name(PyArray_DTypeMeta *dtype)
@@ -258,15 +299,16 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
-    // printf("quad_ufunc_promoter called for ufunc: %s\n", ufunc->name);
-    // printf("Entering quad_ufunc_promoter\n");
-    // printf("Ufunc name: %s\n", ufunc->name);
-    // printf("nin: %d, nargs: %d\n", ufunc->nin, ufunc->nargs);
+    printf("quad_ufunc_promoter called for ufunc: %s\n", ufunc->name);
+    printf("Entering quad_ufunc_promoter\n");
+    printf("Ufunc name: %s\n", ufunc->name);
+    printf("nin: %d, nargs: %d\n", ufunc->nin, ufunc->nargs);
 
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;
     bool has_quad = false;
+    QuadBackendType backend = BACKEND_INVALID;  // Initialize to an invalid state
 
     // Handle the special case for reductions
     if (op_dtypes[0] == NULL) {
@@ -274,55 +316,65 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
         for (int i = 0; i < 3; i++) {
             Py_INCREF(op_dtypes[1]);
             new_op_dtypes[i] = op_dtypes[1];
-            // printf("new_op_dtypes[%d] set to %s\n", i, get_dtype_name(new_op_dtypes[i]));
+            printf("new_op_dtypes[%d] set to %s\n", i, get_dtype_name(new_op_dtypes[i]));
         }
         return 0;
     }
 
     // Check if any input or signature is QuadPrecision
-    for (int i = 0; i < nargs; i++) {
-        if ((i < nin && op_dtypes[i] == &QuadPrecDType) || (signature[i] == &QuadPrecDType)) {
+    for (int i = 0; i < nin; i++) {
+        if (op_dtypes[i] == &QuadPrecDType) {
             has_quad = true;
-            // printf("QuadPrecision detected in input %d or signature\n", i);
+            // todo: Why below line always picking the backend as SLEEF
+            QuadPrecDTypeObject *descr =
+                    (QuadPrecDTypeObject *)PyArray_GetDefaultDescr(op_dtypes[i]);
+
+            const char *s = (descr->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
+            printf("QuadPrecision detected in input %d or signature with backend: %s\n", i, s);
+            if (backend == BACKEND_INVALID)
+                backend = descr->backend;
+            else if (backend != BACKEND_INVALID && backend != descr->backend) {
+                PyErr_SetString(PyExc_TypeError,
+                                "Cannot mix QuadPrecDType arrays with different backends");
+                return -1;
+            }
+            Py_DECREF(descr);
             break;
         }
     }
 
     if (has_quad) {
-        // If QuadPrecision is involved, use it for all arguments
         common = &QuadPrecDType;
-        // printf("Using QuadPrecDType as common type\n");
+        printf("Using QuadPrecDType as common type\n");
     }
     else {
-        // Check if output signature is homogeneous
         for (int i = nin; i < nargs; i++) {
             if (signature[i] != NULL) {
                 if (common == NULL) {
                     Py_INCREF(signature[i]);
                     common = signature[i];
-                    // printf("Common type set to %s from signature\n", get_dtype_name(common));
+                    printf("Common type set to %s from signature\n", get_dtype_name(common));
                 }
                 else if (common != signature[i]) {
                     Py_CLEAR(common);  // Not homogeneous, unset common
-                    // printf("Output signature not homogeneous, cleared common type\n");
+                    printf("Output signature not homogeneous, cleared common type\n");
                     break;
                 }
             }
         }
-
-        // If no common output dtype, use standard promotion for inputs
+    }
+    // If no common output dtype, use standard promotion for inputs
+    if (common == NULL) {
+        printf("Using standard promotion for inputs\n");
+        common = PyArray_PromoteDTypeSequence(nin, op_dtypes);
         if (common == NULL) {
-            // printf("Using standard promotion for inputs\n");
-            common = PyArray_PromoteDTypeSequence(nin, op_dtypes);
-            if (common == NULL) {
-                if (PyErr_ExceptionMatches(PyExc_TypeError)) {
-                    PyErr_Clear();  // Do not propagate normal promotion errors
-                }
-                // printf("Exiting quad_ufunc_promoter (promotion failed)\n");
-                return -1;
+            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+                PyErr_Clear();  // Do not propagate normal promotion errors
             }
-            // printf("Common type after promotion: %s\n", get_dtype_name(common));
+            printf("Exiting quad_ufunc_promoter (promotion failed)\n");
+            return -1;
         }
+        printf("Common type after promotion: %s\n", get_dtype_name(common));
     }
 
     // Set all new_op_dtypes to the common dtype
@@ -331,30 +383,29 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
             // If signature is specified for this argument, use it
             Py_INCREF(signature[i]);
             new_op_dtypes[i] = signature[i];
-            // printf("new_op_dtypes[%d] set to %s (from signature)\n", i,
-            // get_dtype_name(new_op_dtypes[i]));
+            printf("new_op_dtypes[%d] set to %s (from signature)\n", i,
+                   get_dtype_name(new_op_dtypes[i]));
         }
         else {
             // Otherwise, use the common dtype
             Py_INCREF(common);
             new_op_dtypes[i] = common;
-            // printf("new_op_dtypes[%d] set to %s (from common)\n", i,
-            // get_dtype_name(new_op_dtypes[i]));
+            printf("new_op_dtypes[%d] set to %s (from common)\n", i,
+                   get_dtype_name(new_op_dtypes[i]));
         }
     }
 
     Py_XDECREF(common);
-    // printf("Exiting quad_ufunc_promoter\n");
+    printf("Exiting quad_ufunc_promoter\n");
     return 0;
 }
 
-template <binop_def binop>
+template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
 int
 create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
 {
     PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name);
     if (ufunc == NULL) {
-        Py_DecRef(ufunc);
         return -1;
     }
 
@@ -362,7 +413,8 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
 
     PyType_Slot slots[] = {
             {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors},
-            {NPY_METH_strided_loop, (void *)&quad_generic_binop_strided_loop<binop>},
+            {NPY_METH_strided_loop,
+             (void *)&quad_generic_binop_strided_loop<sleef_op, longdouble_op>},
             {0, NULL}};
 
     PyArrayMethod_Spec Spec = {
@@ -404,28 +456,28 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
 int
 init_quad_binary_ops(PyObject *numpy)
 {
-    if (create_quad_binary_ufunc<quad_add>(numpy, "add") < 0) {
+    if (create_quad_binary_ufunc<quad_add, ld_add>(numpy, "add") < 0) {
         return -1;
     }
-    if (create_quad_binary_ufunc<quad_sub>(numpy, "subtract") < 0) {
+    if (create_quad_binary_ufunc<quad_sub, ld_sub>(numpy, "subtract") < 0) {
         return -1;
     }
-    if (create_quad_binary_ufunc<quad_mul>(numpy, "multiply") < 0) {
+    if (create_quad_binary_ufunc<quad_mul, ld_mul>(numpy, "multiply") < 0) {
         return -1;
     }
-    if (create_quad_binary_ufunc<quad_div>(numpy, "divide") < 0) {
+    if (create_quad_binary_ufunc<quad_div, ld_div>(numpy, "divide") < 0) {
         return -1;
     }
-    if (create_quad_binary_ufunc<quad_pow>(numpy, "power") < 0) {
+    if (create_quad_binary_ufunc<quad_pow, ld_pow>(numpy, "power") < 0) {
         return -1;
     }
-    if (create_quad_binary_ufunc<quad_mod>(numpy, "mod") < 0) {
+    if (create_quad_binary_ufunc<quad_mod, ld_mod>(numpy, "mod") < 0) {
         return -1;
     }
-    if (create_quad_binary_ufunc<quad_minimum>(numpy, "minimum") < 0) {
+    if (create_quad_binary_ufunc<quad_minimum, ld_minimum>(numpy, "minimum") < 0) {
         return -1;
     }
-    if (create_quad_binary_ufunc<quad_maximum>(numpy, "maximum") < 0) {
+    if (create_quad_binary_ufunc<quad_maximum, ld_maximum>(numpy, "maximum") < 0) {
         return -1;
     }
     return 0;
@@ -433,7 +485,7 @@ init_quad_binary_ops(PyObject *numpy)
 
 // comparison functions
 
-template <cmp_def comp>
+template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
 int
 quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[],
                                npy_intp const dimensions[], npy_intp const strides[],
@@ -446,8 +498,19 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
     npy_intp in2_stride = strides[1];
     npy_intp out_stride = strides[2];
 
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
     while (N--) {
-        *((npy_bool *)out_ptr) = comp((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr);
+        if (backend == BACKEND_SLEEF) {
+            *((npy_bool *)out_ptr) =
+                    sleef_comp((const Sleef_quad *)in1_ptr, (const Sleef_quad *)in2_ptr);
+        }
+        else {
+            *((npy_bool *)out_ptr) =
+                    ld_comp((const long double *)in1_ptr, (const long double *)in2_ptr);
+        }
 
         in1_ptr += in1_stride;
         in2_ptr += in2_stride;
@@ -472,7 +535,7 @@ comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
     return 0;
 }
 
-template <cmp_def comp>
+template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
 int
 create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
 {
@@ -483,8 +546,10 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
 
     PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
 
-    PyType_Slot slots[] = {{NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<comp>},
-                           {0, NULL}};
+    PyType_Slot slots[] = {
+            {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors},
+            {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
+            {0, NULL}};
 
     PyArrayMethod_Spec Spec = {
             .name = "quad_comp",
@@ -501,7 +566,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
     }
 
     PyObject *promoter_capsule =
-            PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL);
+            PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL);
     if (promoter_capsule == NULL) {
         return -1;
     }
@@ -526,22 +591,23 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
 int
 init_quad_comps(PyObject *numpy)
 {
-    if (create_quad_comparison_ufunc<quad_equal>(numpy, "equal") < 0) {
+    if (create_quad_comparison_ufunc<quad_equal, ld_equal>(numpy, "equal") < 0) {
         return -1;
     }
-    if (create_quad_comparison_ufunc<quad_notequal>(numpy, "not_equal") < 0) {
+    if (create_quad_comparison_ufunc<quad_notequal, ld_notequal>(numpy, "not_equal") < 0) {
         return -1;
     }
-    if (create_quad_comparison_ufunc<quad_less>(numpy, "less") < 0) {
+    if (create_quad_comparison_ufunc<quad_less, ld_less>(numpy, "less") < 0) {
         return -1;
     }
-    if (create_quad_comparison_ufunc<quad_lessequal>(numpy, "less_equal") < 0) {
+    if (create_quad_comparison_ufunc<quad_lessequal, ld_lessequal>(numpy, "less_equal") < 0) {
         return -1;
     }
-    if (create_quad_comparison_ufunc<quad_greater>(numpy, "greater") < 0) {
+    if (create_quad_comparison_ufunc<quad_greater, ld_greater>(numpy, "greater") < 0) {
         return -1;
     }
-    if (create_quad_comparison_ufunc<quad_greaterequal>(numpy, "greater_equal") < 0) {
+    if (create_quad_comparison_ufunc<quad_greaterequal, ld_greaterequal>(numpy, "greater_equal") <
+        0) {
         return -1;
     }
 
@@ -552,18 +618,23 @@ int
 init_quad_umath(void)
 {
     PyObject *numpy = PyImport_ImportModule("numpy");
-    if (!numpy)
+    if (!numpy) {
+        PyErr_SetString(PyExc_ImportError, "Failed to import numpy module");
         return -1;
+    }
 
     if (init_quad_unary_ops(numpy) < 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad unary operations");
         goto err;
     }
 
     if (init_quad_binary_ops(numpy) < 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad binary operations");
         goto err;
     }
 
     if (init_quad_comps(numpy) < 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad comparison operations");
         goto err;
     }
 
diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh
index f552d7a7..5131f15a 100755
--- a/quaddtype/reinstall.sh
+++ b/quaddtype/reinstall.sh
@@ -9,5 +9,5 @@ fi
 
 #meson setup build -Db_sanitize=address,undefined
 python -m pip uninstall -y numpy_quaddtype
-python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug"
-#python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v'
\ No newline at end of file
+# python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug"
+python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v'
\ No newline at end of file
diff --git a/temp.py b/temp.py
new file mode 100644
index 00000000..6ce4fa9f
--- /dev/null
+++ b/temp.py
@@ -0,0 +1,207 @@
+import numpy_quaddtype as npq
+import numpy as np
+
+
+def test_scalar_ops(backend):
+    print(f"\nTesting scalar operations for {backend} backend:")
+
+    # Create QuadPrecision instances
+    q1 = npq.QuadPrecision(
+        "3.14159265358979323846264338327950288", backend=backend)
+    q2 = npq.QuadPrecision(
+        "-2.71828182845904523536028747135266250", backend=backend)
+
+    # Test unary operations
+    print("\nUnary operations:")
+    print(f"  Negation of q1: {-q1}")
+    print(f"  Absolute value of q2: {abs(q2)}")
+
+    # Test binary operations
+    print("\nBinary operations:")
+    print(f"  Addition: {q1 + q2}")
+    print(f"  Subtraction: {q1 - q2}")
+    print(f"  Multiplication: {q1 * q2}")
+    print(f"  Division: {q1 / q2}")
+
+    # Test comparison operations
+    print("\nComparison operations:")
+    print(f"  q1 == q2: {q1 == q2}")
+    print(f"  q1 != q2: {q1 != q2}")
+    print(f"  q1 < q2: {q1 < q2}")
+    print(f"  q1 <= q2: {q1 <= q2}")
+    print(f"  q1 > q2: {q1 > q2}")
+    print(f"  q1 >= q2: {q1 >= q2}")
+
+    # Test operations with Python numbers
+    print("\nOperations with Python numbers:")
+    print(f"  q1 + 1: {q1 + 1}")
+    print(f"  q1 - 2.5: {q1 - 2.5}")
+    print(f"  q1 * 3: {q1 * 3}")
+    print(f"  q1 / 2: {q1 / 2}")
+
+    # Test boolean conversion
+    print("\nBoolean conversion:")
+    print(f"  bool(q1): {bool(q1)}")
+    print(
+        f"  bool(npq.QuadPrecision('0', backend=backend)): {bool(npq.QuadPrecision('0', backend=backend))}")
+
+
+def test_casting(backend):
+    print(f"\nTesting {backend} backend:")
+
+    # Create QuadPrecision instances
+    q1 = npq.QuadPrecision(
+        "3.14159265358979323846264338327950288", backend=backend)
+    q2 = npq.QuadPrecision(
+        "-2.71828182845904523536028747135266250", backend=backend)
+
+    # Test casting from QuadPrecision to numpy dtypes
+    print("Casting from QuadPrecision to numpy dtypes:")
+    print(f"  float32: {np.float32(q1)}")
+    print(f"  float64: {np.float64(q1)}")
+    print(f"  int64: {np.int64(q1)}")
+    print(f"  uint64: {np.uint64(q1)}")
+
+    # Test casting from numpy dtypes to QuadPrecision
+    print("\nCasting from numpy dtypes to QuadPrecision:")
+    print(
+        f"  float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}")
+    print(
+        f"  float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}")
+    print(
+        f"  int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}")
+    print(
+        f"  uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}")
+
+    # Test array operations
+    print("\nArray operations:")
+    q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend))
+    print(f"  QuadPrecision array: {q_array}")
+
+    np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64)
+    q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend))
+    print(f"  Numpy to QuadPrecision: {q_from_np}")
+
+    back_to_np = np.array(q_from_np, dtype=np.float64)
+    print(f"  QuadPrecision to Numpy: {back_to_np}")
+
+    # Test precision maintenance
+    large_int = 12345678901234567890
+    q_large = np.array([large_int], dtype=np.uint64).astype(
+        npq.QuadPrecDType(backend=backend))[0]
+    print(f"\nPrecision test:")
+    print(f"  Original large int: {large_int}")
+    print(f"  QuadPrecision: {q_large}")
+    print(f"  Back to int: {np.int64(q_large)}")
+
+    # Test edge cases
+
+
+def test_edge_cases(backend):
+    print(f"\nTesting negative numbers for {backend} backend:")
+
+    # Test various negative numbers
+    test_values = [
+        -1.0,
+        -1e10,
+        -1e100,
+        -1e300,
+        np.nextafter(np.finfo(np.float64).min, 0),
+        np.finfo(np.float64).min
+    ]
+
+    for value in test_values:
+        q_value = npq.QuadPrecision(str(value), backend=backend)
+        print(f"  Original: {value}")
+        print(f"  QuadPrecision: {q_value}")
+        print(f"  Back to float64: {np.float64(q_value)}")
+        print()
+
+    # Test value beyond float64 precision
+    beyond_float64_precision = "1.7976931348623157081452742373170435e+308"
+    q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend)
+    print(f"  Beyond float64 precision: {q_beyond}")
+    q_float64_max = npq.QuadPrecision(
+        str(np.finfo(np.float64).max), backend=backend)
+    diff = q_beyond - q_float64_max
+    print(f"  Difference from float64 max: {diff}")
+    print(
+        f"  Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}")
+
+    # Test epsilon (smallest representable difference between two numbers)
+    q_epsilon = npq.QuadPrecision(
+        str(np.finfo(np.float64).eps), backend=backend)
+    print(f"  Float64 epsilon in QuadPrecision: {q_epsilon}")
+    q_one = npq.QuadPrecision("1", backend=backend)
+    q_one_plus_epsilon = q_one + q_epsilon
+    print(f"  1 + epsilon != 1: {q_one_plus_epsilon != q_one}")
+    print(f"  (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}")
+
+
+def test_ufuncs(backend):
+    print(f"\nTesting ufuncs for {backend} backend:")
+
+    # Create QuadPrecision arrays
+    q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
+    q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
+
+    # Test unary ufuncs
+    print("\nUnary ufuncs:")
+    print(f"  negative: {np.negative(q_array1)}")
+    print(f"  absolute: {np.absolute(q_array1)}")
+    print(f"  rint: {np.rint(q_array1)}")
+    print(f"  floor: {np.floor(q_array1)}")
+    print(f"  ceil: {np.ceil(q_array1)}")
+    print(f"  trunc: {np.trunc(q_array1)}")
+    print(f"  sqrt: {np.sqrt(q_array1)}")
+    print(f"  square: {np.square(q_array1)}")
+    print(f"  log: {np.log(q_array1)}")
+    print(f"  log2: {np.log2(q_array1)}")
+    print(f"  log10: {np.log10(q_array1)}")
+    print(f"  exp: {np.exp(q_array1)}")
+    print(f"  exp2: {np.exp2(q_array1)}")
+
+    # Test binary ufuncs
+    print("\nBinary ufuncs:")
+    print(f"  add: {np.add(q_array1, q_array2)}")
+    print(f"  subtract: {np.subtract(q_array1, q_array2)}")
+    print(f"  multiply: {np.multiply(q_array1, q_array2)}")
+    print(f"  divide: {np.divide(q_array1, q_array2)}")
+    print(f"  power: {np.power(q_array1, q_array2)}")
+    print(f"  mod: {np.mod(q_array1, q_array2)}")
+    print(f"  minimum: {np.minimum(q_array1, q_array2)}")
+    print(f"  maximum: {np.maximum(q_array1, q_array2)}")
+
+    # Test comparison ufuncs
+    print("\nComparison ufuncs:")
+    print(f"  equal: {np.equal(q_array1, q_array2)}")
+    print(f"  not_equal: {np.not_equal(q_array1, q_array2)}")
+    print(f"  less: {np.less(q_array1, q_array2)}")
+    print(f"  less_equal: {np.less_equal(q_array1, q_array2)}")
+    print(f"  greater: {np.greater(q_array1, q_array2)}")
+    print(f"  greater_equal: {np.greater_equal(q_array1, q_array2)}")
+
+    # Test mixed operations with numpy arrays
+    print(f"Testing backend: {backend}")
+    print("\nMixed operations with numpy arrays:")
+    np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64)
+    print(f"  add: {np.add(q_array1, np_array)}")
+    print(f"  multiply: {np.multiply(q_array1, np_array)}")
+    print(f"  divide: {np.divide(q_array1, np_array)}")
+
+    # Test reduction operations
+    print("\nReduction operations:")
+    print(f"  sum: {np.sum(q_array1)}")
+    print(f"  prod: {np.prod(q_array1)}")
+    print(f"  min: {np.min(q_array1)}")
+    print(f"  max: {np.max(q_array1)}")
+
+
+# Run tests for both backends
+for backend in ['longdouble']:
+    test_scalar_ops(backend)
+    test_casting(backend)
+    test_edge_cases(backend)
+    test_ufuncs(backend)
+
+print("All tests completed successfully")

From d664a598858032a54ba2fa1c66724f22bafefec2 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Mon, 2 Sep 2024 00:23:12 +0530
Subject: [PATCH 06/32] added backend parameter in default descriptor

---
 quaddtype/numpy_quaddtype/src/dtype.c   |   7 +-
 quaddtype/numpy_quaddtype/src/umath.cpp | 113 +++++++++++++-----------
 2 files changed, 64 insertions(+), 56 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index b655f112..c58f8633 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -154,9 +154,12 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr)
 }
 
 static PyArray_Descr *
-quadprec_default_descr(PyArray_DTypeMeta *NPY_UNUSED(cls))
+quadprec_default_descr(PyArray_DTypeMeta *cls)
 {
-    return (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF);
+    QuadPrecDTypeObject *temp = (QuadPrecDTypeObject *)cls;
+    const char *s1 = (temp->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
+    printf("called with backend: %s\n", s1);
+    return (PyArray_Descr *)new_quaddtype_instance(temp->backend);
 }
 
 static PyType_Slot QuadPrecDType_Slots[] = {
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 0d64ff7a..783f38a2 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -21,6 +21,60 @@ extern "C" {
 #include "umath.h"
 #include "ops.hpp"
 
+// helper debugging function
+static const char *
+get_dtype_name(PyArray_DTypeMeta *dtype)
+{
+    if (dtype == &QuadPrecDType) {
+        return "QuadPrecDType";
+    }
+    else if (dtype == &PyArray_BoolDType) {
+        return "BoolDType";
+    }
+    else if (dtype == &PyArray_ByteDType) {
+        return "ByteDType";
+    }
+    else if (dtype == &PyArray_UByteDType) {
+        return "UByteDType";
+    }
+    else if (dtype == &PyArray_ShortDType) {
+        return "ShortDType";
+    }
+    else if (dtype == &PyArray_UShortDType) {
+        return "UShortDType";
+    }
+    else if (dtype == &PyArray_IntDType) {
+        return "IntDType";
+    }
+    else if (dtype == &PyArray_UIntDType) {
+        return "UIntDType";
+    }
+    else if (dtype == &PyArray_LongDType) {
+        return "LongDType";
+    }
+    else if (dtype == &PyArray_ULongDType) {
+        return "ULongDType";
+    }
+    else if (dtype == &PyArray_LongLongDType) {
+        return "LongLongDType";
+    }
+    else if (dtype == &PyArray_ULongLongDType) {
+        return "ULongLongDType";
+    }
+    else if (dtype == &PyArray_FloatDType) {
+        return "FloatDType";
+    }
+    else if (dtype == &PyArray_DoubleDType) {
+        return "DoubleDType";
+    }
+    else if (dtype == &PyArray_LongDoubleDType) {
+        return "LongDoubleDType";
+    }
+    else {
+        return "UnknownDType";
+    }
+}
+
 static NPY_CASTING
 quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
                                   PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[],
@@ -170,6 +224,7 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
                                    PyArray_Descr *const given_descrs[],
                                    PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
 {
+    printf("Descriptor Resolver is calledn\n");
     Py_INCREF(given_descrs[0]);
     loop_descrs[0] = given_descrs[0];
     Py_INCREF(given_descrs[1]);
@@ -177,6 +232,10 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
 
     QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
     QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
+    const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
+    const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
+    printf("1: %s\n", s1);
+    printf("2: %s\n", s2);
 
     if (descr_in1->backend != descr_in2->backend) {
         PyErr_SetString(PyExc_TypeError,
@@ -241,60 +300,6 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data
     return 0;
 }
 
-// helper debugging function
-static const char *
-get_dtype_name(PyArray_DTypeMeta *dtype)
-{
-    if (dtype == &QuadPrecDType) {
-        return "QuadPrecDType";
-    }
-    else if (dtype == &PyArray_BoolDType) {
-        return "BoolDType";
-    }
-    else if (dtype == &PyArray_ByteDType) {
-        return "ByteDType";
-    }
-    else if (dtype == &PyArray_UByteDType) {
-        return "UByteDType";
-    }
-    else if (dtype == &PyArray_ShortDType) {
-        return "ShortDType";
-    }
-    else if (dtype == &PyArray_UShortDType) {
-        return "UShortDType";
-    }
-    else if (dtype == &PyArray_IntDType) {
-        return "IntDType";
-    }
-    else if (dtype == &PyArray_UIntDType) {
-        return "UIntDType";
-    }
-    else if (dtype == &PyArray_LongDType) {
-        return "LongDType";
-    }
-    else if (dtype == &PyArray_ULongDType) {
-        return "ULongDType";
-    }
-    else if (dtype == &PyArray_LongLongDType) {
-        return "LongLongDType";
-    }
-    else if (dtype == &PyArray_ULongLongDType) {
-        return "ULongLongDType";
-    }
-    else if (dtype == &PyArray_FloatDType) {
-        return "FloatDType";
-    }
-    else if (dtype == &PyArray_DoubleDType) {
-        return "DoubleDType";
-    }
-    else if (dtype == &PyArray_LongDoubleDType) {
-        return "LongDoubleDType";
-    }
-    else {
-        return "UnknownDType";
-    }
-}
-
 static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])

From 7a85fbf154d778787fa04725d3c7bd5005d1e1d8 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Mon, 2 Sep 2024 00:24:29 +0530
Subject: [PATCH 07/32] removed todo comment: umath.cpp

---
 quaddtype/numpy_quaddtype/src/umath.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 783f38a2..fc996f95 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -330,7 +330,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
     for (int i = 0; i < nin; i++) {
         if (op_dtypes[i] == &QuadPrecDType) {
             has_quad = true;
-            // todo: Why below line always picking the backend as SLEEF
             QuadPrecDTypeObject *descr =
                     (QuadPrecDTypeObject *)PyArray_GetDefaultDescr(op_dtypes[i]);
 

From e8cba6a3f0d108ae015253a9164fa0763d0870ca Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 5 Sep 2024 14:58:47 +0530
Subject: [PATCH 08/32] temporary solution to handle both backends

---
 quaddtype/numpy_quaddtype/src/casts.cpp |  6 ++
 quaddtype/numpy_quaddtype/src/dtype.c   | 21 +++++--
 quaddtype/numpy_quaddtype/src/umath.cpp | 84 ++++++++++++++-----------
 3 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index 473fd9af..6fdf1825 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -272,6 +272,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
                                   PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
                                   npy_intp *view_offset)
 {
+    printf("cast.cpp: numpy_to_quad_resolve_descriptors is called\n");
     if (given_descrs[1] == NULL) {
         loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF);
         if (loop_descrs[1] == nullptr) {
@@ -279,6 +280,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
         }
     }
     else {
+        printf("cast.cpp: numpy_to_quad_resolve_descriptors, I am in ELSE condition\n");
         Py_INCREF(given_descrs[1]);
         loop_descrs[1] = given_descrs[1];
     }
@@ -297,8 +299,12 @@ numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
     char *in_ptr = data[0];
     char *out_ptr = data[1];
 
+    QuadPrecDTypeObject *descr_out1 = (QuadPrecDTypeObject *)context->descriptors[0];
+    printf("The type of context->descriptor[0] is: %s\n", Py_TYPE(descr_out1)->tp_name);
     QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
+    printf("The type of context->descriptor[1] is: %s\n", Py_TYPE(descr_out)->tp_name);
     QuadBackendType backend = descr_out->backend;
+    printf("cast.cpp: numpy_to_quad_strided_loop with backend: %d\n", backend);
     size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
 
     while (N--) {
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index c58f8633..35b7a36e 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -49,6 +49,13 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend)
 QuadPrecDTypeObject *
 new_quaddtype_instance(QuadBackendType backend)
 {
+    // if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE)
+    // {
+    //     PyErr_SetString(PyExc_TypeError,
+    //                     "Backend must be sleef or longdouble");
+    //     return NULL;
+    // }
+    printf("New Quandtype instance is created with backend: %d\n", backend);
     QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new(
             (PyTypeObject *)&QuadPrecDType, NULL, NULL);
     if (new == NULL) {
@@ -63,6 +70,7 @@ new_quaddtype_instance(QuadBackendType backend)
 static QuadPrecDTypeObject *
 ensure_canonical(QuadPrecDTypeObject *self)
 {
+    printf("Ensure Canonical is called\n");
     Py_INCREF(self);
     return self;
 }
@@ -70,6 +78,7 @@ ensure_canonical(QuadPrecDTypeObject *self)
 static QuadPrecDTypeObject *
 common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 {
+    printf("Common Instance is called\n");
     if (dtype1->backend != dtype2->backend) {
         PyErr_SetString(PyExc_TypeError,
                         "Cannot find common instance for QuadPrecDTypes with different backends");
@@ -82,6 +91,7 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 static PyArray_DTypeMeta *
 common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
 {
+    printf("Common dtype is called\n");
     // Promote integer and floating-point types to QuadPrecDType
     if (other->type_num >= 0 &&
         (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) {
@@ -105,7 +115,9 @@ quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), P
         PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array.");
         return NULL;
     }
+    
     QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
+    printf("dtype.c: quadprec_discover_descriptor_from_pyobject is called with backend %d\n", quad_obj->backend);
     return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend);
 }
 
@@ -156,10 +168,11 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr)
 static PyArray_Descr *
 quadprec_default_descr(PyArray_DTypeMeta *cls)
 {
-    QuadPrecDTypeObject *temp = (QuadPrecDTypeObject *)cls;
-    const char *s1 = (temp->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
-    printf("called with backend: %s\n", s1);
-    return (PyArray_Descr *)new_quaddtype_instance(temp->backend);
+    QuadPrecDTypeObject * a = (QuadPrecDTypeObject *)cls;
+     printf("Default descriptor called with backend: %d\n", a->backend);
+    QuadPrecDTypeObject * temp = new_quaddtype_instance(a->backend);
+    printf("Default descriptor made backend: %d\n", temp->backend);
+    return (PyArray_Descr *)temp;
 }
 
 static PyType_Slot QuadPrecDType_Slots[] = {
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index fc996f95..2adabc1f 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -175,6 +175,9 @@ init_quad_unary_ops(PyObject *numpy)
     if (create_quad_unary_ufunc<quad_negative, ld_negative>(numpy, "negative") < 0) {
         return -1;
     }
+    if (create_quad_unary_ufunc<quad_positive, ld_positive>(numpy, "positive") < 0) {
+        return -1;
+    }
     if (create_quad_unary_ufunc<quad_absolute, ld_absolute>(numpy, "absolute") < 0) {
         return -1;
     }
@@ -224,44 +227,62 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
                                    PyArray_Descr *const given_descrs[],
                                    PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
 {
-    printf("Descriptor Resolver is calledn\n");
-    Py_INCREF(given_descrs[0]);
-    loop_descrs[0] = given_descrs[0];
-    Py_INCREF(given_descrs[1]);
-    loop_descrs[1] = given_descrs[1];
+    printf("Descriptor Resolver is called\n");
 
     QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
     QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
+    QuadBackendType target_backend;
+
     const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
     const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
-    printf("1: %s\n", s1);
-    printf("2: %s\n", s2);
+    printf("1: %s   %d  %s\n", s1, descr_in1->backend, Py_TYPE(given_descrs[0])->tp_name);
+    printf("2: %s   %d  %s\n", s2, descr_in2->backend, Py_TYPE(given_descrs[1])->tp_name);
 
+    // Determine target backend and if casting is needed
+    NPY_CASTING casting = NPY_NO_CASTING;
     if (descr_in1->backend != descr_in2->backend) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Cannot operate on QuadPrecision objects with different backends");
-        return (NPY_CASTING)-1;
+        target_backend = BACKEND_LONGDOUBLE;
+        casting = NPY_SAFE_CASTING;
+        printf("Different backends detected. Casting to LONGDOUBLE.\n");
+    } else {
+        target_backend = descr_in1->backend;
+        printf("Unified backend: %s\n", (target_backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE");
+    }
+
+    // Set up input descriptors, casting if necessary
+    for (int i = 0; i < 2; i++) {
+        if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) {
+            loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
+            if (!loop_descrs[i]) {
+                return (NPY_CASTING)-1;
+            }
+        } else {
+            Py_INCREF(given_descrs[i]);
+            loop_descrs[i] = given_descrs[i];
+        }
     }
 
+    // Set up output descriptor
     if (given_descrs[2] == NULL) {
-        loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(descr_in1->backend);
+        loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
         if (!loop_descrs[2]) {
             return (NPY_CASTING)-1;
         }
-    }
-    else {
-        Py_INCREF(given_descrs[2]);
-        loop_descrs[2] = given_descrs[2];
-    }
-
-    QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[2];
-    if (descr_out->backend != descr_in1->backend) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Output QuadPrecision object must have the same backend as inputs");
-        return (NPY_CASTING)-1;
+    } else {
+        QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2];
+        if (descr_out->backend != target_backend) {
+            loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
+            if (!loop_descrs[2]) {
+                return (NPY_CASTING)-1;
+            }
+        } else {
+            Py_INCREF(given_descrs[2]);
+            loop_descrs[2] = given_descrs[2];
+        }
     }
 
-    return NPY_NO_CASTING;
+    printf("Casting result: %d\n", casting);
+    return casting;
 }
 
 template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
@@ -270,6 +291,7 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data
                                 npy_intp const dimensions[], npy_intp const strides[],
                                 NpyAuxData *auxdata)
 {
+    printf("Umath: Generic Strided loop is calledn\n");
     npy_intp N = dimensions[0];
     char *in1_ptr = data[0], *in2_ptr = data[1];
     char *out_ptr = data[2];
@@ -328,22 +350,10 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
 
     // Check if any input or signature is QuadPrecision
     for (int i = 0; i < nin; i++) {
+        printf("iterating on dtype : %s\n", get_dtype_name(op_dtypes[i]));
         if (op_dtypes[i] == &QuadPrecDType) {
             has_quad = true;
-            QuadPrecDTypeObject *descr =
-                    (QuadPrecDTypeObject *)PyArray_GetDefaultDescr(op_dtypes[i]);
-
-            const char *s = (descr->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
-            printf("QuadPrecision detected in input %d or signature with backend: %s\n", i, s);
-            if (backend == BACKEND_INVALID)
-                backend = descr->backend;
-            else if (backend != BACKEND_INVALID && backend != descr->backend) {
-                PyErr_SetString(PyExc_TypeError,
-                                "Cannot mix QuadPrecDType arrays with different backends");
-                return -1;
-            }
-            Py_DECREF(descr);
-            break;
+            printf("QuadPrecision detected in input %d\n", i);
         }
     }
 

From 01e9b7ee9864cea1e80796b5cd7414372473590f Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Fri, 6 Sep 2024 12:04:01 +0530
Subject: [PATCH 09/32] post-cleaning

---
 quaddtype/meson.build                         |  56 -----
 quaddtype/numpy_quaddtype/__init__.py         |   5 +-
 quaddtype/numpy_quaddtype/src/casts.cpp       |   8 +-
 quaddtype/numpy_quaddtype/src/dtype.c         |  27 +--
 .../numpy_quaddtype/src/quaddtype_main.c      |  17 ++
 quaddtype/numpy_quaddtype/src/umath.cpp       |  35 +--
 temp.py                                       | 207 ------------------
 7 files changed, 43 insertions(+), 312 deletions(-)
 delete mode 100644 quaddtype/meson.build
 delete mode 100644 temp.py

diff --git a/quaddtype/meson.build b/quaddtype/meson.build
deleted file mode 100644
index 1b9f6248..00000000
--- a/quaddtype/meson.build
+++ /dev/null
@@ -1,56 +0,0 @@
-project('numpy_quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true'])
-
-py_mod = import('python')
-py = py_mod.find_installation()
-
-c = meson.get_compiler('c')
-
-sleef_dep = c.find_library('sleef')
-sleefquad_dep = c.find_library('sleefquad')
-
-incdir_numpy = run_command(py,
-  [
-    '-c',
-    'import numpy; import os; print(os.path.relpath(numpy.get_include()))'
-  ],
-  check: true
-).stdout().strip()
-
-includes = include_directories(
-    [
-        incdir_numpy,
-        'numpy_quaddtype/src',
-    ]
-)
-
-srcs = [
-    'numpy_quaddtype/src/casts.h',
-    'numpy_quaddtype/src/casts.cpp',
-    'numpy_quaddtype/src/scalar.h',
-    'numpy_quaddtype/src/scalar.c',
-    'numpy_quaddtype/src/dtype.h',
-    'numpy_quaddtype/src/dtype.c',
-    'numpy_quaddtype/src/quaddtype_main.c',
-    'numpy_quaddtype/src/scalar_ops.h',
-    'numpy_quaddtype/src/scalar_ops.cpp',
-    'numpy_quaddtype/src/ops.hpp',
-    # 'numpy_quaddtype/src/umath.h',
-    # 'numpy_quaddtype/src/umath.cpp'
-]
-
-py.install_sources(
-    [
-        'numpy_quaddtype/__init__.py',
-    ],
-    subdir: 'numpy_quaddtype',
-    pure: false
-)
-
-py.extension_module('_quaddtype_main',
-srcs,
-c_args: ['-g', '-O0', '-lsleef', '-lsleefquad'],
-dependencies: [sleef_dep, sleefquad_dep],
-install: true,
-subdir: 'numpy_quaddtype',
-include_directories: includes
-)
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py
index 5ee5ab30..5aa50441 100644
--- a/quaddtype/numpy_quaddtype/__init__.py
+++ b/quaddtype/numpy_quaddtype/__init__.py
@@ -1,10 +1,11 @@
 from ._quaddtype_main import (
     QuadPrecision,
-    QuadPrecDType
+    QuadPrecDType,
+    is_longdouble_128
 )
 
 __all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision',
-           'SleefQuadPrecDType', 'LongDoubleQuadPrecDType']
+           'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128']
 
 
 def SleefQuadPrecision(value):
diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index 6fdf1825..61809012 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -272,7 +272,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
                                   PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
                                   npy_intp *view_offset)
 {
-    printf("cast.cpp: numpy_to_quad_resolve_descriptors is called\n");
+
     if (given_descrs[1] == NULL) {
         loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF);
         if (loop_descrs[1] == nullptr) {
@@ -280,7 +280,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
         }
     }
     else {
-        printf("cast.cpp: numpy_to_quad_resolve_descriptors, I am in ELSE condition\n");
+
         Py_INCREF(given_descrs[1]);
         loop_descrs[1] = given_descrs[1];
     }
@@ -299,12 +299,8 @@ numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
     char *in_ptr = data[0];
     char *out_ptr = data[1];
 
-    QuadPrecDTypeObject *descr_out1 = (QuadPrecDTypeObject *)context->descriptors[0];
-    printf("The type of context->descriptor[0] is: %s\n", Py_TYPE(descr_out1)->tp_name);
     QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
-    printf("The type of context->descriptor[1] is: %s\n", Py_TYPE(descr_out)->tp_name);
     QuadBackendType backend = descr_out->backend;
-    printf("cast.cpp: numpy_to_quad_strided_loop with backend: %d\n", backend);
     size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
 
     while (N--) {
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index 35b7a36e..36bca565 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -49,13 +49,13 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend)
 QuadPrecDTypeObject *
 new_quaddtype_instance(QuadBackendType backend)
 {
-    // if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE)
-    // {
-    //     PyErr_SetString(PyExc_TypeError,
-    //                     "Backend must be sleef or longdouble");
-    //     return NULL;
-    // }
-    printf("New Quandtype instance is created with backend: %d\n", backend);
+    if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE)
+    {
+        PyErr_SetString(PyExc_TypeError,
+                        "Backend must be sleef or longdouble");
+        return NULL;
+    }
+
     QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new(
             (PyTypeObject *)&QuadPrecDType, NULL, NULL);
     if (new == NULL) {
@@ -70,7 +70,7 @@ new_quaddtype_instance(QuadBackendType backend)
 static QuadPrecDTypeObject *
 ensure_canonical(QuadPrecDTypeObject *self)
 {
-    printf("Ensure Canonical is called\n");
+
     Py_INCREF(self);
     return self;
 }
@@ -78,7 +78,7 @@ ensure_canonical(QuadPrecDTypeObject *self)
 static QuadPrecDTypeObject *
 common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 {
-    printf("Common Instance is called\n");
+
     if (dtype1->backend != dtype2->backend) {
         PyErr_SetString(PyExc_TypeError,
                         "Cannot find common instance for QuadPrecDTypes with different backends");
@@ -91,7 +91,7 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 static PyArray_DTypeMeta *
 common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
 {
-    printf("Common dtype is called\n");
+
     // Promote integer and floating-point types to QuadPrecDType
     if (other->type_num >= 0 &&
         (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) {
@@ -117,7 +117,7 @@ quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), P
     }
     
     QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
-    printf("dtype.c: quadprec_discover_descriptor_from_pyobject is called with backend %d\n", quad_obj->backend);
+
     return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend);
 }
 
@@ -168,10 +168,7 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr)
 static PyArray_Descr *
 quadprec_default_descr(PyArray_DTypeMeta *cls)
 {
-    QuadPrecDTypeObject * a = (QuadPrecDTypeObject *)cls;
-     printf("Default descriptor called with backend: %d\n", a->backend);
-    QuadPrecDTypeObject * temp = new_quaddtype_instance(a->backend);
-    printf("Default descriptor made backend: %d\n", temp->backend);
+    QuadPrecDTypeObject * temp = new_quaddtype_instance(BACKEND_SLEEF);
     return (PyArray_Descr *)temp;
 }
 
diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 994d1eba..89ec16f0 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -13,12 +13,29 @@
 #include "dtype.h"
 #include "umath.h"
 #include "quad_common.h"
+#include "float.h"
+
+static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) {
+    if(sizeof(long double) == 16 && 
+        LDBL_MANT_DIG == 113 && 
+        LDBL_MAX_EXP == 16384) {
+        Py_RETURN_TRUE;
+    } else {
+        Py_RETURN_FALSE;
+    }
+}
+
+static PyMethodDef module_methods[] = {
+    {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"},
+    {NULL, NULL, 0, NULL} 
+};
 
 static struct PyModuleDef moduledef = {
         PyModuleDef_HEAD_INIT,
         .m_name = "_quaddtype_main",
         .m_doc = "Quad (128-bit) floating point Data Type for NumPy with multiple backends",
         .m_size = -1,
+        .m_methods = module_methods
 };
 
 PyMODINIT_FUNC
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 2adabc1f..2444047c 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -227,7 +227,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
                                    PyArray_Descr *const given_descrs[],
                                    PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
 {
-    printf("Descriptor Resolver is called\n");
 
     QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
     QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
@@ -235,18 +234,14 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
 
     const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
     const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
-    printf("1: %s   %d  %s\n", s1, descr_in1->backend, Py_TYPE(given_descrs[0])->tp_name);
-    printf("2: %s   %d  %s\n", s2, descr_in2->backend, Py_TYPE(given_descrs[1])->tp_name);
 
     // Determine target backend and if casting is needed
     NPY_CASTING casting = NPY_NO_CASTING;
     if (descr_in1->backend != descr_in2->backend) {
         target_backend = BACKEND_LONGDOUBLE;
         casting = NPY_SAFE_CASTING;
-        printf("Different backends detected. Casting to LONGDOUBLE.\n");
     } else {
         target_backend = descr_in1->backend;
-        printf("Unified backend: %s\n", (target_backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE");
     }
 
     // Set up input descriptors, casting if necessary
@@ -280,8 +275,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
             loop_descrs[2] = given_descrs[2];
         }
     }
-
-    printf("Casting result: %d\n", casting);
     return casting;
 }
 
@@ -291,7 +284,6 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data
                                 npy_intp const dimensions[], npy_intp const strides[],
                                 NpyAuxData *auxdata)
 {
-    printf("Umath: Generic Strided loop is calledn\n");
     npy_intp N = dimensions[0];
     char *in1_ptr = data[0], *in2_ptr = data[1];
     char *out_ptr = data[2];
@@ -326,10 +318,6 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
-    printf("quad_ufunc_promoter called for ufunc: %s\n", ufunc->name);
-    printf("Entering quad_ufunc_promoter\n");
-    printf("Ufunc name: %s\n", ufunc->name);
-    printf("nin: %d, nargs: %d\n", ufunc->nin, ufunc->nargs);
 
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
@@ -343,23 +331,23 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
         for (int i = 0; i < 3; i++) {
             Py_INCREF(op_dtypes[1]);
             new_op_dtypes[i] = op_dtypes[1];
-            printf("new_op_dtypes[%d] set to %s\n", i, get_dtype_name(new_op_dtypes[i]));
+
         }
         return 0;
     }
 
     // Check if any input or signature is QuadPrecision
     for (int i = 0; i < nin; i++) {
-        printf("iterating on dtype : %s\n", get_dtype_name(op_dtypes[i]));
+
         if (op_dtypes[i] == &QuadPrecDType) {
             has_quad = true;
-            printf("QuadPrecision detected in input %d\n", i);
+
         }
     }
 
     if (has_quad) {
         common = &QuadPrecDType;
-        printf("Using QuadPrecDType as common type\n");
+
     }
     else {
         for (int i = nin; i < nargs; i++) {
@@ -367,11 +355,11 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                 if (common == NULL) {
                     Py_INCREF(signature[i]);
                     common = signature[i];
-                    printf("Common type set to %s from signature\n", get_dtype_name(common));
+
                 }
                 else if (common != signature[i]) {
                     Py_CLEAR(common);  // Not homogeneous, unset common
-                    printf("Output signature not homogeneous, cleared common type\n");
+
                     break;
                 }
             }
@@ -379,16 +367,15 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
     }
     // If no common output dtype, use standard promotion for inputs
     if (common == NULL) {
-        printf("Using standard promotion for inputs\n");
         common = PyArray_PromoteDTypeSequence(nin, op_dtypes);
         if (common == NULL) {
             if (PyErr_ExceptionMatches(PyExc_TypeError)) {
                 PyErr_Clear();  // Do not propagate normal promotion errors
             }
-            printf("Exiting quad_ufunc_promoter (promotion failed)\n");
+
             return -1;
         }
-        printf("Common type after promotion: %s\n", get_dtype_name(common));
+
     }
 
     // Set all new_op_dtypes to the common dtype
@@ -397,20 +384,16 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
             // If signature is specified for this argument, use it
             Py_INCREF(signature[i]);
             new_op_dtypes[i] = signature[i];
-            printf("new_op_dtypes[%d] set to %s (from signature)\n", i,
-                   get_dtype_name(new_op_dtypes[i]));
         }
         else {
             // Otherwise, use the common dtype
             Py_INCREF(common);
             new_op_dtypes[i] = common;
-            printf("new_op_dtypes[%d] set to %s (from common)\n", i,
-                   get_dtype_name(new_op_dtypes[i]));
         }
     }
 
     Py_XDECREF(common);
-    printf("Exiting quad_ufunc_promoter\n");
+
     return 0;
 }
 
diff --git a/temp.py b/temp.py
deleted file mode 100644
index 6ce4fa9f..00000000
--- a/temp.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import numpy_quaddtype as npq
-import numpy as np
-
-
-def test_scalar_ops(backend):
-    print(f"\nTesting scalar operations for {backend} backend:")
-
-    # Create QuadPrecision instances
-    q1 = npq.QuadPrecision(
-        "3.14159265358979323846264338327950288", backend=backend)
-    q2 = npq.QuadPrecision(
-        "-2.71828182845904523536028747135266250", backend=backend)
-
-    # Test unary operations
-    print("\nUnary operations:")
-    print(f"  Negation of q1: {-q1}")
-    print(f"  Absolute value of q2: {abs(q2)}")
-
-    # Test binary operations
-    print("\nBinary operations:")
-    print(f"  Addition: {q1 + q2}")
-    print(f"  Subtraction: {q1 - q2}")
-    print(f"  Multiplication: {q1 * q2}")
-    print(f"  Division: {q1 / q2}")
-
-    # Test comparison operations
-    print("\nComparison operations:")
-    print(f"  q1 == q2: {q1 == q2}")
-    print(f"  q1 != q2: {q1 != q2}")
-    print(f"  q1 < q2: {q1 < q2}")
-    print(f"  q1 <= q2: {q1 <= q2}")
-    print(f"  q1 > q2: {q1 > q2}")
-    print(f"  q1 >= q2: {q1 >= q2}")
-
-    # Test operations with Python numbers
-    print("\nOperations with Python numbers:")
-    print(f"  q1 + 1: {q1 + 1}")
-    print(f"  q1 - 2.5: {q1 - 2.5}")
-    print(f"  q1 * 3: {q1 * 3}")
-    print(f"  q1 / 2: {q1 / 2}")
-
-    # Test boolean conversion
-    print("\nBoolean conversion:")
-    print(f"  bool(q1): {bool(q1)}")
-    print(
-        f"  bool(npq.QuadPrecision('0', backend=backend)): {bool(npq.QuadPrecision('0', backend=backend))}")
-
-
-def test_casting(backend):
-    print(f"\nTesting {backend} backend:")
-
-    # Create QuadPrecision instances
-    q1 = npq.QuadPrecision(
-        "3.14159265358979323846264338327950288", backend=backend)
-    q2 = npq.QuadPrecision(
-        "-2.71828182845904523536028747135266250", backend=backend)
-
-    # Test casting from QuadPrecision to numpy dtypes
-    print("Casting from QuadPrecision to numpy dtypes:")
-    print(f"  float32: {np.float32(q1)}")
-    print(f"  float64: {np.float64(q1)}")
-    print(f"  int64: {np.int64(q1)}")
-    print(f"  uint64: {np.uint64(q1)}")
-
-    # Test casting from numpy dtypes to QuadPrecision
-    print("\nCasting from numpy dtypes to QuadPrecision:")
-    print(
-        f"  float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}")
-    print(
-        f"  float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}")
-    print(
-        f"  int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}")
-    print(
-        f"  uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}")
-
-    # Test array operations
-    print("\nArray operations:")
-    q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend))
-    print(f"  QuadPrecision array: {q_array}")
-
-    np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64)
-    q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend))
-    print(f"  Numpy to QuadPrecision: {q_from_np}")
-
-    back_to_np = np.array(q_from_np, dtype=np.float64)
-    print(f"  QuadPrecision to Numpy: {back_to_np}")
-
-    # Test precision maintenance
-    large_int = 12345678901234567890
-    q_large = np.array([large_int], dtype=np.uint64).astype(
-        npq.QuadPrecDType(backend=backend))[0]
-    print(f"\nPrecision test:")
-    print(f"  Original large int: {large_int}")
-    print(f"  QuadPrecision: {q_large}")
-    print(f"  Back to int: {np.int64(q_large)}")
-
-    # Test edge cases
-
-
-def test_edge_cases(backend):
-    print(f"\nTesting negative numbers for {backend} backend:")
-
-    # Test various negative numbers
-    test_values = [
-        -1.0,
-        -1e10,
-        -1e100,
-        -1e300,
-        np.nextafter(np.finfo(np.float64).min, 0),
-        np.finfo(np.float64).min
-    ]
-
-    for value in test_values:
-        q_value = npq.QuadPrecision(str(value), backend=backend)
-        print(f"  Original: {value}")
-        print(f"  QuadPrecision: {q_value}")
-        print(f"  Back to float64: {np.float64(q_value)}")
-        print()
-
-    # Test value beyond float64 precision
-    beyond_float64_precision = "1.7976931348623157081452742373170435e+308"
-    q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend)
-    print(f"  Beyond float64 precision: {q_beyond}")
-    q_float64_max = npq.QuadPrecision(
-        str(np.finfo(np.float64).max), backend=backend)
-    diff = q_beyond - q_float64_max
-    print(f"  Difference from float64 max: {diff}")
-    print(
-        f"  Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}")
-
-    # Test epsilon (smallest representable difference between two numbers)
-    q_epsilon = npq.QuadPrecision(
-        str(np.finfo(np.float64).eps), backend=backend)
-    print(f"  Float64 epsilon in QuadPrecision: {q_epsilon}")
-    q_one = npq.QuadPrecision("1", backend=backend)
-    q_one_plus_epsilon = q_one + q_epsilon
-    print(f"  1 + epsilon != 1: {q_one_plus_epsilon != q_one}")
-    print(f"  (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}")
-
-
-def test_ufuncs(backend):
-    print(f"\nTesting ufuncs for {backend} backend:")
-
-    # Create QuadPrecision arrays
-    q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
-    q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
-
-    # Test unary ufuncs
-    print("\nUnary ufuncs:")
-    print(f"  negative: {np.negative(q_array1)}")
-    print(f"  absolute: {np.absolute(q_array1)}")
-    print(f"  rint: {np.rint(q_array1)}")
-    print(f"  floor: {np.floor(q_array1)}")
-    print(f"  ceil: {np.ceil(q_array1)}")
-    print(f"  trunc: {np.trunc(q_array1)}")
-    print(f"  sqrt: {np.sqrt(q_array1)}")
-    print(f"  square: {np.square(q_array1)}")
-    print(f"  log: {np.log(q_array1)}")
-    print(f"  log2: {np.log2(q_array1)}")
-    print(f"  log10: {np.log10(q_array1)}")
-    print(f"  exp: {np.exp(q_array1)}")
-    print(f"  exp2: {np.exp2(q_array1)}")
-
-    # Test binary ufuncs
-    print("\nBinary ufuncs:")
-    print(f"  add: {np.add(q_array1, q_array2)}")
-    print(f"  subtract: {np.subtract(q_array1, q_array2)}")
-    print(f"  multiply: {np.multiply(q_array1, q_array2)}")
-    print(f"  divide: {np.divide(q_array1, q_array2)}")
-    print(f"  power: {np.power(q_array1, q_array2)}")
-    print(f"  mod: {np.mod(q_array1, q_array2)}")
-    print(f"  minimum: {np.minimum(q_array1, q_array2)}")
-    print(f"  maximum: {np.maximum(q_array1, q_array2)}")
-
-    # Test comparison ufuncs
-    print("\nComparison ufuncs:")
-    print(f"  equal: {np.equal(q_array1, q_array2)}")
-    print(f"  not_equal: {np.not_equal(q_array1, q_array2)}")
-    print(f"  less: {np.less(q_array1, q_array2)}")
-    print(f"  less_equal: {np.less_equal(q_array1, q_array2)}")
-    print(f"  greater: {np.greater(q_array1, q_array2)}")
-    print(f"  greater_equal: {np.greater_equal(q_array1, q_array2)}")
-
-    # Test mixed operations with numpy arrays
-    print(f"Testing backend: {backend}")
-    print("\nMixed operations with numpy arrays:")
-    np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64)
-    print(f"  add: {np.add(q_array1, np_array)}")
-    print(f"  multiply: {np.multiply(q_array1, np_array)}")
-    print(f"  divide: {np.divide(q_array1, np_array)}")
-
-    # Test reduction operations
-    print("\nReduction operations:")
-    print(f"  sum: {np.sum(q_array1)}")
-    print(f"  prod: {np.prod(q_array1)}")
-    print(f"  min: {np.min(q_array1)}")
-    print(f"  max: {np.max(q_array1)}")
-
-
-# Run tests for both backends
-for backend in ['longdouble']:
-    test_scalar_ops(backend)
-    test_casting(backend)
-    test_edge_cases(backend)
-    test_ufuncs(backend)
-
-print("All tests completed successfully")

From 22982a43f361e6513c7f0e6fd2b81a2250ce58a0 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Fri, 6 Sep 2024 19:09:12 +0530
Subject: [PATCH 10/32] adding dragon4

---
 quaddtype/numpy_quaddtype/src/dragon4.c       | 2057 +++++++++++++++++
 quaddtype/numpy_quaddtype/src/dragon4.h       |   70 +
 quaddtype/numpy_quaddtype/src/dtype.c         |   12 +-
 .../numpy_quaddtype/src/quaddtype_main.c      |    1 +
 quaddtype/numpy_quaddtype/src/scalar.c        |   61 +-
 quaddtype/numpy_quaddtype/src/scalar.h        |    3 +
 quaddtype/numpy_quaddtype/src/umath.cpp       |    4 -
 7 files changed, 2201 insertions(+), 7 deletions(-)
 create mode 100644 quaddtype/numpy_quaddtype/src/dragon4.c
 create mode 100644 quaddtype/numpy_quaddtype/src/dragon4.h

diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c
new file mode 100644
index 00000000..313ff312
--- /dev/null
+++ b/quaddtype/numpy_quaddtype/src/dragon4.c
@@ -0,0 +1,2057 @@
+#include <numpy/npy_common.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <sleef.h>
+#include <sleefquad.h>
+
+#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
+#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
+#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION
+#define NPY_TARGET_VERSION NPY_2_0_API_VERSION
+#define NO_IMPORT_ARRAY
+#define NO_IMPORT_UFUNC
+
+#include "dragon4.h"
+#include "dtype.h"
+#include "scalar.h"
+
+#if 0
+#define DEBUG_ASSERT(stmnt) assert(stmnt)
+#else
+#define DEBUG_ASSERT(stmnt) do {} while(0)
+#endif
+
+#define c_BigInt_MaxBlocks  1023
+#define BIGINT_DRAGON4_GROUPSIZE 7
+
+typedef struct BigInt {
+    npy_uint32 length;
+    npy_uint32 blocks[c_BigInt_MaxBlocks];
+} BigInt;
+
+typedef struct {
+    BigInt bigints[BIGINT_DRAGON4_GROUPSIZE];
+    char repr[16384];
+} Dragon4_Scratch;
+
+static NPY_TLS Dragon4_Scratch _bigint_static;
+
+static inline npy_uint64
+bitmask_u64(npy_uint32 n)
+{
+    return ~(~((npy_uint64)0) << n);
+}
+
+static inline npy_uint32
+bitmask_u32(npy_uint32 n)
+{
+    return ~(~((npy_uint32)0) << n);
+}
+
+/* result = result * 10 */
+static void
+BigInt_Multiply10(BigInt *result)
+{
+    /* multiply all the blocks */
+    npy_uint64 carry = 0;
+
+    npy_uint32 *cur = result->blocks;
+    npy_uint32 *end = result->blocks + result->length;
+    for ( ; cur != end; ++cur) {
+        npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry;
+        (*cur) = (npy_uint32)(product & bitmask_u64(32));
+        carry = product >> 32;
+    }
+
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks);
+        *cur = (npy_uint32)carry;
+        ++result->length;
+    }
+}
+
+static npy_uint32 g_PowerOf10_U32[] =
+{
+    1,          /* 10 ^ 0 */
+    10,         /* 10 ^ 1 */
+    100,        /* 10 ^ 2 */
+    1000,       /* 10 ^ 3 */
+    10000,      /* 10 ^ 4 */
+    100000,     /* 10 ^ 5 */
+    1000000,    /* 10 ^ 6 */
+    10000000,   /* 10 ^ 7 */
+};
+
+/*
+ * Note: This has a lot of wasted space in the big integer structures of the
+ *       early table entries. It wouldn't be terribly hard to make the multiply
+ *       function work on integer pointers with an array length instead of
+ *       the BigInt struct which would allow us to store a minimal amount of
+ *       data here.
+ */
+static BigInt g_PowerOf10_Big[] =
+{
+    /* 10 ^ 8 */
+    { 1, { 100000000 } },
+    /* 10 ^ 16 */
+    { 2, { 0x6fc10000, 0x002386f2 } },
+    /* 10 ^ 32 */
+    { 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } },
+    /* 10 ^ 64 */
+    { 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed,
+           0xe93ff9f4, 0x00184f03, } },
+    /* 10 ^ 128 */
+    { 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01,
+            0x03df9909, 0x0f1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08,
+            0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } },
+    /* 10 ^ 256 */
+    { 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x982e7c01, 0xbed3875b,
+            0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, 0xd595d80f,
+            0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e,
+            0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc,
+            0x5fdcefce, 0x000553f7, } },
+    /* 10 ^ 512 */
+    { 54, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f,
+            0xb83a8a97, 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1,
+            0x4475b579, 0x28f8733b, 0xaa1da1bf, 0x703ed321, 0x1e25cfea,
+            0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae,
+            0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0,
+            0x93abade4, 0x1366732a, 0x9449775c, 0x69be5b0e, 0x7343afac,
+            0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, 0x8a0b1f13,
+            0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, } },
+    /* 10 ^ 1024 */
+    { 107, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b,
+             0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a,
+             0xcc2cd819, 0x0ed1440e, 0x896634ee, 0x7de16cfb, 0x1e43f61f,
+             0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, 0xf451218b,
+             0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07,
+             0x62be695a, 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5,
+             0xba34c411, 0xd814b505, 0xbf3fdeb3, 0x8fc51a16, 0xb1b896bc,
+             0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb,
+             0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66,
+             0xfc8f1f90, 0x976a3310, 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8,
+             0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, 0x31b5653f,
+             0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b,
+             0x1640114a, 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06,
+             0x73c55349, 0xa7e6bd2a, 0xc1a6970c, 0x47187094, 0xd2db49ef,
+             0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94,
+             0xd9d61a05, 0x00000325, } },
+    /* 10 ^ 2048 */
+    { 213, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1333e001,
+             0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354,
+             0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773,
+             0xea50d142, 0x51b78db2, 0x98342c9e, 0xc850dabc, 0x866ed6f1,
+             0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, 0x71c7fd8f,
+             0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1,
+             0x9713b449, 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643,
+             0xe3cb64f3, 0x8074ff01, 0xe3892ee9, 0x10c17f94, 0xa8f16f92,
+             0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44,
+             0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974,
+             0xd6c7d6ab, 0x8e48ff35, 0x4419bd43, 0xc4a65665, 0x685e5510,
+             0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, 0x982da466,
+             0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e,
+             0x1aa9f444, 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62,
+             0x944545ff, 0x40660460, 0x77c1a42f, 0xc9ac375d, 0xe866d7ef,
+             0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19,
+             0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e,
+             0x785ce6b7, 0x56c6815b, 0x6f46eadb, 0x4eeebeee, 0x195355d8,
+             0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, 0xde9ec24b,
+             0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b,
+             0xfb8886fb, 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943,
+             0x58ccf45c, 0xf82df846, 0x415c7f3e, 0x2915e818, 0x8b3d5cf4,
+             0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34,
+             0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14,
+             0xe0de0840, 0xf775677c, 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9,
+             0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, 0x907855ee,
+             0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d,
+             0x0af908b4, 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6,
+             0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, 0x11eb962b, 0x9648b21c,
+             0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac,
+             0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, } },
+    /* 10 ^ 4096 */
+    { 426, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d,
+             0x8efe7ae7, 0xf89a1e90, 0xef084117, 0x54e05154, 0x13b1bb51,
+             0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3,
+             0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da,
+             0x1d1f4cce, 0x9b8ba1e8, 0xd6bfdf75, 0xe341be10, 0xc2dfae78,
+             0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, 0xcab3e6d7,
+             0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982,
+             0xf8169c84, 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905,
+             0x54ab9c41, 0x7613b224, 0x1a1c304e, 0x3bfe167b, 0x441c2d47,
+             0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e,
+             0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507,
+             0x3f8ec72a, 0xfd82373a, 0x95a842bc, 0x280f4d32, 0xf3618ac0,
+             0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, 0xdcfe388f,
+             0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c,
+             0x655fec30, 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed,
+             0x3b9af26b, 0xc994a450, 0xb5341743, 0x75a697b2, 0xac50b9c1,
+             0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb,
+             0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa,
+             0x176ca776, 0x77fb8af6, 0xeca19beb, 0x92baf1de, 0xaf63b712,
+             0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, 0x87d1cde8,
+             0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b,
+             0x1fa8fd5d, 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925,
+             0xf984d83e, 0x7a872e74, 0xc19e1faf, 0xed4d542d, 0xecf9b5d0,
+             0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a,
+             0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8,
+             0xab312bb9, 0x1f2b2322, 0x440b3f25, 0xbf627ede, 0x72dac789,
+             0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, 0xbb9373f4,
+             0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb,
+             0x3172826a, 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9,
+             0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, 0x3d326a7e, 0xeeaf19e6,
+             0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4,
+             0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf,
+             0x9ee3352c, 0x3613c850, 0xfe78d682, 0x788f6e50, 0x5b060904,
+             0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, 0xa6e9cfda,
+             0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8,
+             0x4dcd6cd9, 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc,
+             0xc32da383, 0xfb126ab1, 0xa03f40a8, 0xed5bf546, 0xe9ce4724,
+             0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb,
+             0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999,
+             0x35e836b5, 0xa96ddeff, 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d,
+             0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, 0x00fc509b,
+             0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407,
+             0x53400d03, 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc,
+             0x41a4c954, 0xe03b2b63, 0x437b3d7f, 0x97aced8e, 0xcbd66670,
+             0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df,
+             0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4,
+             0x56c02d9a, 0x8c4ef804, 0xc1a08a13, 0x13fd01c8, 0xe6d27671,
+             0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, 0x544f10cd,
+             0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb,
+             0x84e857fd, 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982,
+             0xaec29e6e, 0x87ec7038, 0x7000a519, 0xaeee333b, 0xff66e42c,
+             0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e,
+             0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f,
+             0xac7867f9, 0x52ec8721, 0xbcdd9d4a, 0x8edd2d00, 0x3557de06,
+             0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, 0xb6100fab,
+             0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5,
+             0x35fcb457, 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21,
+             0x4964046f, 0x18ae4240, 0xd868b275, 0x8bd2b496, 0x1c5563f4,
+             0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee,
+             0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145,
+             0x00000062, } },
+};
+
+static int
+BigInt_IsZero(const BigInt *i)
+{
+    return i->length == 0;
+}
+
+/*
+ * Returns 1 if the value is even
+ */
+static int
+BigInt_IsEven(const BigInt *i)
+{
+    return (i->length == 0) || ( (i->blocks[0] % 2) == 0);
+}
+
+static void
+BigInt_Copy(BigInt *dst, const BigInt *src)
+{
+    npy_uint32 length = src->length;
+    npy_uint32 * dstp = dst->blocks;
+    const npy_uint32 *srcp;
+    for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) {
+        *dstp = *srcp;
+    }
+    dst->length = length;
+}
+
+/* result = result << shift */
+static void
+BigInt_ShiftLeft(BigInt *result, npy_uint32 shift)
+{
+    npy_uint32 shiftBlocks = shift / 32;
+    npy_uint32 shiftBits = shift % 32;
+
+    /* process blocks high to low so that we can safely process in place */
+    const npy_uint32 *pInBlocks = result->blocks;
+    npy_int32 inLength = result->length;
+    npy_uint32 *pInCur, *pOutCur;
+
+    DEBUG_ASSERT(inLength + shiftBlocks < c_BigInt_MaxBlocks);
+    DEBUG_ASSERT(shift != 0);
+
+    /* check if the shift is block aligned */
+    if (shiftBits == 0) {
+        npy_uint32 i;
+
+        /* copy blocks from high to low */
+        for (pInCur = result->blocks + result->length,
+                 pOutCur = pInCur + shiftBlocks;
+                 pInCur >= pInBlocks;
+                 --pInCur, --pOutCur) {
+            *pOutCur = *pInCur;
+        }
+
+        /* zero the remaining low blocks */
+        for (i  = 0; i < shiftBlocks; ++i) {
+            result->blocks[i] = 0;
+        }
+
+        result->length += shiftBlocks;
+    }
+    /* else we need to shift partial blocks */
+    else {
+        npy_uint32 i;
+        npy_int32 inBlockIdx = inLength - 1;
+        npy_uint32 outBlockIdx = inLength + shiftBlocks;
+
+        /* output the initial blocks */
+        const npy_uint32 lowBitsShift = (32 - shiftBits);
+        npy_uint32 highBits = 0;
+        npy_uint32 block = result->blocks[inBlockIdx];
+        npy_uint32 lowBits = block >> lowBitsShift;
+
+        /* set the length to hold the shifted blocks */
+        DEBUG_ASSERT(outBlockIdx < c_BigInt_MaxBlocks);
+        result->length = outBlockIdx + 1;
+
+        while (inBlockIdx > 0) {
+            result->blocks[outBlockIdx] = highBits | lowBits;
+            highBits = block << shiftBits;
+
+            --inBlockIdx;
+            --outBlockIdx;
+
+            block = result->blocks[inBlockIdx];
+            lowBits = block >> lowBitsShift;
+        }
+
+        /* output the final blocks */
+        DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1);
+        result->blocks[outBlockIdx] = highBits | lowBits;
+        result->blocks[outBlockIdx-1] = block << shiftBits;
+
+        /* zero the remaining low blocks */
+        for (i = 0; i < shiftBlocks; ++i) {
+            result->blocks[i] = 0;
+        }
+
+        /* check if the terminating block has no set bits */
+        if (result->blocks[result->length - 1] == 0) {
+            --result->length;
+        }
+    }
+}
+
+static void
+BigInt_Set_uint32(BigInt *i, npy_uint32 val)
+{
+    if (val != 0) {
+        i->blocks[0] = val;
+        i->length = 1;
+    }
+    else {
+        i->length = 0;
+    }
+}
+
+/* result = 2^exponent */
+static inline void
+BigInt_Pow2(BigInt *result, npy_uint32 exponent)
+{
+    npy_uint32 bitIdx;
+    npy_uint32 blockIdx = exponent / 32;
+    npy_uint32 i;
+
+    DEBUG_ASSERT(blockIdx < c_BigInt_MaxBlocks);
+
+    for (i = 0; i <= blockIdx; ++i) {
+        result->blocks[i] = 0;
+    }
+
+    result->length = blockIdx + 1;
+
+    bitIdx = (exponent % 32);
+    result->blocks[blockIdx] |= ((npy_uint32)1 << bitIdx);
+}
+
+static void
+BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo)
+{
+    if (hi > bitmask_u64(32)) {
+        i->length = 4;
+    }
+    else if (hi != 0) {
+        i->length = 3;
+    }
+    else if (lo > bitmask_u64(32)) {
+        i->length = 2;
+    }
+    else if (lo != 0) {
+        i->length = 1;
+    }
+    else {
+        i->length = 0;
+    }
+
+    /* Note deliberate fallthrough in this switch */
+    switch (i->length) {
+        case 4:
+            i->blocks[3] = (hi >> 32) & bitmask_u64(32);
+        case 3:
+            i->blocks[2] = hi & bitmask_u64(32);
+        case 2:
+            i->blocks[1] = (lo >> 32) & bitmask_u64(32);
+        case 1:
+            i->blocks[0] = lo & bitmask_u64(32);
+    }
+}
+
+/* result = lhs * rhs */
+static void
+BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs)
+{
+    /* perform long multiplication */
+    npy_uint32 carry = 0;
+    npy_uint32 *resultCur = result->blocks;
+    const npy_uint32 *pLhsCur = lhs->blocks;
+    const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length;
+    for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
+        npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry;
+        *resultCur = (npy_uint32)(product & bitmask_u64(32));
+        carry = product >> 32;
+    }
+
+    /* if there is a remaining carry, grow the array */
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(lhs->length + 1 <= c_BigInt_MaxBlocks);
+        *resultCur = (npy_uint32)carry;
+        result->length = lhs->length + 1;
+    }
+    else {
+        result->length = lhs->length;
+    }
+}
+
+/*
+ * result = lhs * rhs
+ */
+static void
+BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs)
+{
+    const BigInt *large;
+    const BigInt *small;
+    npy_uint32 maxResultLen;
+    npy_uint32 *cur, *end, *resultStart;
+    const npy_uint32 *smallCur;
+
+    DEBUG_ASSERT(result != lhs && result != rhs);
+
+    /* determine which operand has the smaller length */
+    if (lhs->length < rhs->length) {
+        small = lhs;
+        large = rhs;
+    }
+    else {
+        small = rhs;
+        large = lhs;
+    }
+
+    /* set the maximum possible result length */
+    maxResultLen = large->length + small->length;
+    DEBUG_ASSERT(maxResultLen <= c_BigInt_MaxBlocks);
+
+    /* clear the result data */
+    for (cur = result->blocks, end = cur + maxResultLen; cur != end; ++cur) {
+        *cur = 0;
+    }
+
+    /* perform standard long multiplication for each small block */
+    resultStart = result->blocks;
+    for (smallCur = small->blocks;
+            smallCur != small->blocks + small->length;
+            ++smallCur, ++resultStart) {
+        /*
+         * if non-zero, multiply against all the large blocks and add into the
+         * result
+         */
+        const npy_uint32 multiplier = *smallCur;
+        if (multiplier != 0) {
+            const npy_uint32 *largeCur = large->blocks;
+            npy_uint32 *resultCur = resultStart;
+            npy_uint64 carry = 0;
+            do {
+                npy_uint64 product = (*resultCur) +
+                                     (*largeCur)*(npy_uint64)multiplier + carry;
+                carry = product >> 32;
+                *resultCur = product & bitmask_u64(32);
+                ++largeCur;
+                ++resultCur;
+            } while(largeCur != large->blocks + large->length);
+
+            DEBUG_ASSERT(resultCur < result->blocks + maxResultLen);
+            *resultCur = (npy_uint32)(carry & bitmask_u64(32));
+        }
+    }
+
+    /* check if the terminating block has no set bits */
+    if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) {
+        result->length = maxResultLen-1;
+    }
+    else {
+        result->length = maxResultLen;
+    }
+}
+
+/* in = in * 10^exponent */
+static void
+BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp)
+{
+    /* use two temporary values to reduce large integer copy operations */
+    BigInt *curTemp, *pNextTemp;
+    npy_uint32 smallExponent;
+    npy_uint32 tableIdx = 0;
+
+    /* make sure the exponent is within the bounds of the lookup table data */
+    DEBUG_ASSERT(exponent < 8192);
+
+    /*
+     * initialize the result by looking up a 32-bit power of 10 corresponding to
+     * the first 3 bits
+     */
+    smallExponent = exponent & bitmask_u32(3);
+    if (smallExponent != 0) {
+        BigInt_Multiply_int(temp, in, g_PowerOf10_U32[smallExponent]);
+        curTemp = temp;
+        pNextTemp = in;
+    }
+    else {
+        curTemp = in;
+        pNextTemp = temp;
+    }
+
+    /* remove the low bits that we used for the 32-bit lookup table */
+    exponent >>= 3;
+
+    /* while there are remaining bits in the exponent to be processed */
+    while (exponent != 0) {
+        /* if the current bit is set, multiply by this power of 10 */
+        if (exponent & 1) {
+            BigInt *pSwap;
+
+            /* multiply into the next temporary */
+            BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]);
+
+            /* swap to the next temporary */
+            pSwap = curTemp;
+            curTemp = pNextTemp;
+            pNextTemp = pSwap;
+        }
+
+        /* advance to the next bit */
+        ++tableIdx;
+        exponent >>= 1;
+    }
+
+    /* output the result */
+    if (curTemp != in){
+        BigInt_Copy(in, curTemp);
+    }
+}
+
+/* result = 10^exponent */
+static void
+BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp)
+{
+    /* use two temporary values to reduce large integer copy operations */
+    BigInt *curTemp = result;
+    BigInt *pNextTemp = temp;
+    npy_uint32 smallExponent;
+    npy_uint32 tableIdx = 0;
+
+    /* make sure the exponent is within the bounds of the lookup table data */
+    DEBUG_ASSERT(exponent < 8192);
+
+    /*
+     * initialize the result by looking up a 32-bit power of 10 corresponding to
+     * the first 3 bits
+     */
+    smallExponent = exponent & bitmask_u32(3);
+    BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]);
+
+    /* remove the low bits that we used for the 32-bit lookup table */
+    exponent >>= 3;
+
+    /* while there are remaining bits in the exponent to be processed */
+    while (exponent != 0) {
+        /* if the current bit is set, multiply by this power of 10 */
+        if (exponent & 1) {
+            BigInt *pSwap;
+
+            /* multiply into the next temporary */
+            BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]);
+
+            /* swap to the next temporary */
+            pSwap = curTemp;
+            curTemp = pNextTemp;
+            pNextTemp = pSwap;
+        }
+
+        /* advance to the next bit */
+        ++tableIdx;
+        exponent >>= 1;
+    }
+
+    /* output the result */
+    if (curTemp != result) {
+        BigInt_Copy(result, curTemp);
+    }
+}
+
+/* result = lhs + rhs */
+static void
+BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs)
+{
+    /* determine which operand has the smaller length */
+    const BigInt *large, *small;
+    npy_uint64 carry = 0;
+    const npy_uint32 *largeCur, *smallCur, *largeEnd, *smallEnd;
+    npy_uint32 *resultCur;
+
+    if (lhs->length < rhs->length) {
+        small = lhs;
+        large = rhs;
+    }
+    else {
+        small = rhs;
+        large = lhs;
+    }
+
+    /* The output will be at least as long as the largest input */
+    result->length = large->length;
+
+    /* Add each block and add carry the overflow to the next block */
+    largeCur  = large->blocks;
+    largeEnd  = largeCur + large->length;
+    smallCur  = small->blocks;
+    smallEnd  = smallCur + small->length;
+    resultCur = result->blocks;
+    while (smallCur != smallEnd) {
+        npy_uint64 sum = carry + (npy_uint64)(*largeCur) +
+                                 (npy_uint64)(*smallCur);
+        carry = sum >> 32;
+        *resultCur = sum & bitmask_u64(32);
+        ++largeCur;
+        ++smallCur;
+        ++resultCur;
+    }
+
+    /* Add the carry to any blocks that only exist in the large operand */
+    while (largeCur != largeEnd) {
+        npy_uint64 sum = carry + (npy_uint64)(*largeCur);
+        carry = sum >> 32;
+        (*resultCur) = sum & bitmask_u64(32);
+        ++largeCur;
+        ++resultCur;
+    }
+
+    /* If there's still a carry, append a new block */
+    if (carry != 0) {
+        DEBUG_ASSERT(carry == 1);
+        DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) ==
+               large->length && (large->length < c_BigInt_MaxBlocks));
+        *resultCur = 1;
+        result->length = large->length + 1;
+    }
+    else {
+        result->length = large->length;
+    }
+}
+
+/* result = in * 2 */
+static void
+BigInt_Multiply2(BigInt *result, const BigInt *in)
+{
+    /* shift all the blocks by one */
+    npy_uint32 carry = 0;
+
+    npy_uint32 *resultCur = result->blocks;
+    const npy_uint32 *pLhsCur = in->blocks;
+    const npy_uint32 *pLhsEnd = in->blocks + in->length;
+    for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
+        npy_uint32 cur = *pLhsCur;
+        *resultCur = (cur << 1) | carry;
+        carry = cur >> 31;
+    }
+
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(in->length + 1 <= c_BigInt_MaxBlocks);
+        *resultCur = carry;
+        result->length = in->length + 1;
+    }
+    else {
+        result->length = in->length;
+    }
+}
+
+/* result = result * 2 */
+static void
+BigInt_Multiply2_inplace(BigInt *result)
+{
+    /* shift all the blocks by one */
+    npy_uint32 carry = 0;
+
+    npy_uint32 *cur = result->blocks;
+    npy_uint32 *end = result->blocks + result->length;
+    for ( ; cur != end; ++cur) {
+        npy_uint32 tmpcur = *cur;
+        *cur = (tmpcur << 1) | carry;
+        carry = tmpcur >> 31;
+    }
+
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks);
+        *cur = carry;
+        ++result->length;
+    }
+}
+
+static npy_int32
+BigInt_Compare(const BigInt *lhs, const BigInt *rhs)
+{
+    int i;
+
+    /* A bigger length implies a bigger number. */
+    npy_int32 lengthDiff = lhs->length - rhs->length;
+    if (lengthDiff != 0) {
+        return lengthDiff;
+    }
+
+    /* Compare blocks one by one from high to low. */
+    for (i = lhs->length - 1; i >= 0; --i) {
+        if (lhs->blocks[i] == rhs->blocks[i]) {
+            continue;
+        }
+        else if (lhs->blocks[i] > rhs->blocks[i]) {
+            return 1;
+        }
+        else {
+            return -1;
+        }
+    }
+
+    /* no blocks differed */
+    return 0;
+}
+
+static npy_uint32
+BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor)
+{
+    npy_uint32 length, quotient;
+    const npy_uint32 *finalDivisorBlock;
+    npy_uint32 *finalDividendBlock;
+
+    /*
+     * Check that the divisor has been correctly shifted into range and that it
+     * is not smaller than the dividend in length.
+     */
+    DEBUG_ASSERT(!divisor->length == 0 &&
+                divisor->blocks[divisor->length-1] >= 8 &&
+                divisor->blocks[divisor->length-1] < bitmask_u64(32) &&
+                dividend->length <= divisor->length);
+
+    /*
+     * If the dividend is smaller than the divisor, the quotient is zero and the
+     * divisor is already the remainder.
+     */
+    length = divisor->length;
+    if (dividend->length < divisor->length) {
+        return 0;
+    }
+
+    finalDivisorBlock = divisor->blocks + length - 1;
+    finalDividendBlock = dividend->blocks + length - 1;
+
+    /*
+     * Compute an estimated quotient based on the high block value. This will
+     * either match the actual quotient or undershoot by one.
+     */
+    quotient = *finalDividendBlock / (*finalDivisorBlock + 1);
+    DEBUG_ASSERT(quotient <= 9);
+
+    /* Divide out the estimated quotient */
+    if (quotient != 0) {
+        /* dividend = dividend - divisor*quotient */
+        const npy_uint32 *divisorCur = divisor->blocks;
+        npy_uint32 *dividendCur = dividend->blocks;
+
+        npy_uint64 borrow = 0;
+        npy_uint64 carry = 0;
+        do {
+            npy_uint64 difference, product;
+
+            product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry;
+            carry = product >> 32;
+
+            difference = (npy_uint64)*dividendCur
+                       - (product & bitmask_u64(32)) - borrow;
+            borrow = (difference >> 32) & 1;
+
+            *dividendCur = difference & bitmask_u64(32);
+
+            ++divisorCur;
+            ++dividendCur;
+        } while(divisorCur <= finalDivisorBlock);
+
+        /* remove all leading zero blocks from dividend */
+        while (length > 0 && dividend->blocks[length - 1] == 0) {
+            --length;
+        }
+
+        dividend->length = length;
+    }
+
+    /*
+     * If the dividend is still larger than the divisor, we overshot our
+     * estimate quotient. To correct, we increment the quotient and subtract one
+     * more divisor from the dividend.
+     */
+    if (BigInt_Compare(dividend, divisor) >= 0) {
+        /* dividend = dividend - divisor */
+        const npy_uint32 *divisorCur = divisor->blocks;
+        npy_uint32 *dividendCur = dividend->blocks;
+        npy_uint64 borrow = 0;
+
+        ++quotient;
+
+        do {
+            npy_uint64 difference = (npy_uint64)*dividendCur
+                                  - (npy_uint64)*divisorCur - borrow;
+            borrow = (difference >> 32) & 1;
+
+            *dividendCur = difference & bitmask_u64(32);
+
+            ++divisorCur;
+            ++dividendCur;
+        } while(divisorCur <= finalDivisorBlock);
+
+        /* remove all leading zero blocks from dividend */
+        while (length > 0 && dividend->blocks[length - 1] == 0) {
+            --length;
+        }
+
+        dividend->length = length;
+    }
+
+    return quotient;
+}
+
+static npy_uint32
+LogBase2_32(npy_uint32 val)
+{
+    static const npy_uint8 logTable[256] =
+    {
+        0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+    };
+
+    npy_uint32 temp;
+
+    temp = val >> 24;
+    if (temp) {
+        return 24 + logTable[temp];
+    }
+
+    temp = val >> 16;
+    if (temp) {
+        return 16 + logTable[temp];
+    }
+
+    temp = val >> 8;
+    if (temp) {
+        return 8 + logTable[temp];
+    }
+
+    return logTable[val];
+}
+
+static npy_uint32
+LogBase2_64(npy_uint64 val)
+{
+    npy_uint64 temp;
+
+    temp = val >> 32;
+    if (temp) {
+        return 32 + LogBase2_32((npy_uint32)temp);
+    }
+
+    return LogBase2_32((npy_uint32)val);
+}
+
+static npy_uint32
+LogBase2_128(npy_uint64 hi, npy_uint64 lo)
+{
+    if (hi) {
+        return 64 + LogBase2_64(hi);
+    }
+
+    return LogBase2_64(lo);
+}
+
+static npy_uint32
+PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
+            npy_uint32 mantissaHexWidth, char signbit)
+{
+    npy_uint32 maxPrintLen = bufferSize-1;
+    npy_uint32 pos = 0;
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    /* Check for infinity */
+    if (mantissa == 0) {
+        npy_uint32 printLen;
+
+        /* only print sign for inf values (though nan can have a sign set) */
+        if (signbit == '+') {
+            if (pos < maxPrintLen-1) {
+                buffer[pos++] = '+';
+            }
+        }
+        else if (signbit == '-') {
+            if (pos < maxPrintLen-1) {
+                buffer[pos++] = '-';
+            }
+        }
+
+        /* copy and make sure the buffer is terminated */
+        printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos;
+        memcpy(buffer + pos, "inf", printLen);
+        buffer[pos + printLen] = '\0';
+        return pos + printLen;
+    }
+    else {
+        /* copy and make sure the buffer is terminated */
+        npy_uint32 printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos;
+        memcpy(buffer + pos, "nan", printLen);
+        buffer[pos + printLen] = '\0';
+
+        /*
+         *  For numpy we ignore unusual mantissa values for nan, but keep this
+         *  code in case we change our mind later.
+         *
+         * // append HEX value
+         * if (maxPrintLen > 3) {
+         *     printLen += PrintHex(buffer+3, bufferSize-3, mantissa,
+         *                          mantissaHexWidth);
+         * }
+         */
+
+        return pos + printLen;
+    }
+}
+
+static npy_uint32
+Dragon4(BigInt *bigints, const npy_int32 exponent,
+        const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins,
+        const DigitMode digitMode, const CutoffMode cutoffMode,
+        npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer,
+        npy_uint32 bufferSize, npy_int32 *pOutExponent)
+{
+    char *curDigit = pOutBuffer;
+
+    /*
+     * We compute values in integer format by rescaling as
+     *   mantissa = scaledValue / scale
+     *   marginLow = scaledMarginLow / scale
+     *   marginHigh = scaledMarginHigh / scale
+     * Here, marginLow and marginHigh represent 1/2 of the distance to the next
+     * floating point value above/below the mantissa.
+     *
+     * scaledMarginHigh will point to scaledMarginLow in the case they must be
+     * equal to each other, otherwise it will point to optionalMarginHigh.
+     */
+    BigInt *mantissa = &bigints[0];  /* the only initialized bigint */
+    BigInt *scale = &bigints[1];
+    BigInt *scaledValue = &bigints[2];
+    BigInt *scaledMarginLow = &bigints[3];
+    BigInt *scaledMarginHigh;
+    BigInt *optionalMarginHigh = &bigints[4];
+
+    BigInt *temp1 = &bigints[5];
+    BigInt *temp2 = &bigints[6];
+
+    const npy_float64 log10_2 = 0.30102999566398119521373889472449;
+    npy_int32 digitExponent, hiBlock;
+    npy_int32 cutoff_max_Exponent, cutoff_min_Exponent;
+    npy_uint32 outputDigit;    /* current digit being output */
+    npy_uint32 outputLen;
+    npy_bool isEven = BigInt_IsEven(mantissa);
+    npy_int32 cmp;
+
+    /* values used to determine how to round */
+    npy_bool low, high, roundDown;
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    /* if the mantissa is zero, the value is zero regardless of the exponent */
+    if (BigInt_IsZero(mantissa)) {
+        *curDigit = '0';
+        *pOutExponent = 0;
+        return 1;
+    }
+
+    BigInt_Copy(scaledValue, mantissa);
+
+    if (hasUnequalMargins) {
+        /* if we have no fractional component */
+        if (exponent > 0) {
+            /*
+             * 1) Expand the input value by multiplying out the mantissa and
+             *    exponent. This represents the input value in its whole number
+             *    representation.
+             * 2) Apply an additional scale of 2 such that later comparisons
+             *    against the margin values are simplified.
+             * 3) Set the margin value to the lowest mantissa bit's scale.
+             */
+
+            /* scaledValue      = 2 * 2 * mantissa*2^exponent */
+            BigInt_ShiftLeft(scaledValue, exponent + 2);
+            /* scale            = 2 * 2 * 1 */
+            BigInt_Set_uint32(scale,  4);
+            /* scaledMarginLow  = 2 * 2^(exponent-1) */
+            BigInt_Pow2(scaledMarginLow, exponent);
+            /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */
+            BigInt_Pow2(optionalMarginHigh, exponent + 1);
+        }
+        /* else we have a fractional exponent */
+        else {
+            /*
+             * In order to track the mantissa data as an integer, we store it as
+             * is with a large scale
+             */
+
+            /* scaledValue      = 2 * 2 * mantissa */
+            BigInt_ShiftLeft(scaledValue, 2);
+            /* scale            = 2 * 2 * 2^(-exponent) */
+            BigInt_Pow2(scale, -exponent + 2);
+            /* scaledMarginLow  = 2 * 2^(-1) */
+            BigInt_Set_uint32(scaledMarginLow, 1);
+            /* scaledMarginHigh = 2 * 2 * 2^(-1) */
+            BigInt_Set_uint32(optionalMarginHigh, 2);
+        }
+
+        /* the high and low margins are different */
+        scaledMarginHigh = optionalMarginHigh;
+    }
+    else {
+        /* if we have no fractional component */
+        if (exponent > 0) {
+            /* scaledValue     = 2 * mantissa*2^exponent */
+            BigInt_ShiftLeft(scaledValue, exponent + 1);
+            /* scale           = 2 * 1 */
+            BigInt_Set_uint32(scale, 2);
+            /* scaledMarginLow = 2 * 2^(exponent-1) */
+            BigInt_Pow2(scaledMarginLow, exponent);
+        }
+        /* else we have a fractional exponent */
+        else {
+            /*
+             * In order to track the mantissa data as an integer, we store it as
+             * is with a large scale
+             */
+
+            /* scaledValue     = 2 * mantissa */
+            BigInt_ShiftLeft(scaledValue, 1);
+            /* scale           = 2 * 2^(-exponent) */
+            BigInt_Pow2(scale, -exponent + 1);
+            /* scaledMarginLow = 2 * 2^(-1) */
+            BigInt_Set_uint32(scaledMarginLow, 1);
+        }
+
+        /* the high and low margins are equal */
+        scaledMarginHigh = scaledMarginLow;
+    }
+
+    /*
+     * Compute an estimate for digitExponent that will be correct or undershoot
+     * by one.  This optimization is based on the paper "Printing Floating-Point
+     * Numbers Quickly and Accurately" by Burger and Dybvig
+     * https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656
+     * We perform an additional subtraction of 0.69 to increase the frequency of
+     * a failed estimate because that lets us take a faster branch in the code.
+     * 0.69 is chosen because 0.69 + log10(2) is less than one by a reasonable
+     * epsilon that will account for any floating point error.
+     *
+     * We want to set digitExponent to floor(log10(v)) + 1
+     *  v = mantissa*2^exponent
+     *  log2(v) = log2(mantissa) + exponent;
+     *  log10(v) = log2(v) * log10(2)
+     *  floor(log2(v)) = mantissaBit + exponent;
+     *  log10(v) - log10(2) < (mantissaBit + exponent) * log10(2) <= log10(v)
+     *  log10(v) < (mantissaBit + exponent) * log10(2) + log10(2)
+     *                                                 <= log10(v) + log10(2)
+     *  floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2))
+     *                                                 <= floor(log10(v)) + 1
+     *
+     *  Warning: This calculation assumes npy_float64 is an IEEE-binary64
+     *  float. This line may need to be updated if this is not the case.
+     */
+    digitExponent = (npy_int32)(
+       ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69));
+
+    /*
+     * if the digit exponent is smaller than the smallest desired digit for
+     * fractional cutoff, pull the digit back into legal range at which point we
+     * will round to the appropriate value.  Note that while our value for
+     * digitExponent is still an estimate, this is safe because it only
+     * increases the number. This will either correct digitExponent to an
+     * accurate value or it will clamp it above the accurate value.
+     */
+    if (cutoff_max >= 0 && cutoffMode == CutoffMode_FractionLength &&
+            digitExponent <= -cutoff_max) {
+        digitExponent = -cutoff_max + 1;
+    }
+
+
+    /* Divide value by 10^digitExponent. */
+    if (digitExponent > 0) {
+        /* A positive exponent creates a division so we multiply the scale. */
+        BigInt_MultiplyPow10(scale, digitExponent, temp1);
+    }
+    else if (digitExponent < 0) {
+        /*
+         * A negative exponent creates a multiplication so we multiply up the
+         * scaledValue, scaledMarginLow and scaledMarginHigh.
+         */
+        BigInt *temp=temp1, *pow10=temp2;
+        BigInt_Pow10(pow10, -digitExponent, temp);
+
+        BigInt_Multiply(temp, scaledValue, pow10);
+        BigInt_Copy(scaledValue, temp);
+
+        BigInt_Multiply(temp, scaledMarginLow, pow10);
+        BigInt_Copy(scaledMarginLow, temp);
+
+        if (scaledMarginHigh != scaledMarginLow) {
+            BigInt_Multiply2(scaledMarginHigh, scaledMarginLow);
+        }
+    }
+
+    /* If (value >= 1), our estimate for digitExponent was too low */
+    if (BigInt_Compare(scaledValue, scale) >= 0) {
+        /*
+         * The exponent estimate was incorrect.
+         * Increment the exponent and don't perform the premultiply needed
+         * for the first loop iteration.
+         */
+        digitExponent = digitExponent + 1;
+    }
+    else {
+        /*
+         * The exponent estimate was correct.
+         * Multiply larger by the output base to prepare for the first loop
+         * iteration.
+         */
+        BigInt_Multiply10(scaledValue);
+        BigInt_Multiply10(scaledMarginLow);
+        if (scaledMarginHigh != scaledMarginLow) {
+            BigInt_Multiply2(scaledMarginHigh, scaledMarginLow);
+        }
+    }
+
+    /*
+     * Compute the cutoff_max exponent (the exponent of the final digit to
+     * print).  Default to the maximum size of the output buffer.
+     */
+    cutoff_max_Exponent = digitExponent - bufferSize;
+    if (cutoff_max >= 0) {
+        npy_int32 desiredCutoffExponent;
+
+        if (cutoffMode == CutoffMode_TotalLength) {
+            desiredCutoffExponent = digitExponent - cutoff_max;
+            if (desiredCutoffExponent > cutoff_max_Exponent) {
+                cutoff_max_Exponent = desiredCutoffExponent;
+            }
+        }
+        /* Otherwise it's CutoffMode_FractionLength. Print cutoff_max digits
+         * past the decimal point or until we reach the buffer size
+         */
+        else {
+            desiredCutoffExponent = -cutoff_max;
+            if (desiredCutoffExponent > cutoff_max_Exponent) {
+                cutoff_max_Exponent = desiredCutoffExponent;
+            }
+        }
+    }
+    /* Also compute the cutoff_min exponent. */
+    cutoff_min_Exponent = digitExponent;
+    if (cutoff_min >= 0) {
+        npy_int32 desiredCutoffExponent;
+
+        if (cutoffMode == CutoffMode_TotalLength) {
+            desiredCutoffExponent = digitExponent - cutoff_min;
+            if (desiredCutoffExponent < cutoff_min_Exponent) {
+                cutoff_min_Exponent = desiredCutoffExponent;
+            }
+        }
+        else {
+            desiredCutoffExponent = -cutoff_min;
+            if (desiredCutoffExponent < cutoff_min_Exponent) {
+                cutoff_min_Exponent = desiredCutoffExponent;
+            }
+        }
+    }
+
+    /* Output the exponent of the first digit we will print */
+    *pOutExponent = digitExponent-1;
+
+    /*
+     * In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we
+     * need to scale up our values such that the highest block of the
+     * denominator is greater than or equal to 8. We also need to guarantee that
+     * the numerator can never have a length greater than the denominator after
+     * each loop iteration.  This requires the highest block of the denominator
+     * to be less than or equal to 429496729 which is the highest number that
+     * can be multiplied by 10 without overflowing to a new block.
+     */
+    DEBUG_ASSERT(scale->length > 0);
+    hiBlock = scale->blocks[scale->length - 1];
+    if (hiBlock < 8 || hiBlock > 429496729) {
+        npy_uint32 hiBlockLog2, shift;
+
+        /*
+         * Perform a bit shift on all values to get the highest block of the
+         * denominator into the range [8,429496729]. We are more likely to make
+         * accurate quotient estimations in
+         * BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator
+         * values so we shift the denominator to place the highest bit at index
+         * 27 of the highest block.  This is safe because (2^28 - 1) = 268435455
+         * which is less than 429496729. This means that all values with a
+         * highest bit at index 27 are within range.
+         */
+        hiBlockLog2 = LogBase2_32(hiBlock);
+        DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27);
+        shift = (32 + 27 - hiBlockLog2) % 32;
+
+        BigInt_ShiftLeft(scale, shift);
+        BigInt_ShiftLeft(scaledValue, shift);
+        BigInt_ShiftLeft(scaledMarginLow, shift);
+        if (scaledMarginHigh != scaledMarginLow) {
+            BigInt_Multiply2(scaledMarginHigh, scaledMarginLow);
+        }
+    }
+
+    if (digitMode == DigitMode_Unique) {
+        /*
+         * For the unique cutoff mode, we will try to print until we have
+         * reached a level of precision that uniquely distinguishes this value
+         * from its neighbors. If we run out of space in the output buffer, we
+         * terminate early.
+         */
+        for (;;) {
+            BigInt *scaledValueHigh = temp1;
+
+            digitExponent = digitExponent-1;
+
+            /* divide out the scale to extract the digit */
+            outputDigit =
+                BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale);
+            DEBUG_ASSERT(outputDigit < 10);
+
+            /* update the high end of the value */
+            BigInt_Add(scaledValueHigh, scaledValue, scaledMarginHigh);
+
+            /*
+             * stop looping if we are far enough away from our neighboring
+             * values (and we have printed at least the requested minimum
+             * digits) or if we have reached the cutoff digit
+             */
+            cmp = BigInt_Compare(scaledValue, scaledMarginLow);
+            low = isEven ? (cmp <= 0) : (cmp < 0);
+            cmp = BigInt_Compare(scaledValueHigh, scale);
+            high = isEven ? (cmp >= 0) : (cmp > 0);
+            if (((low | high) & (digitExponent <= cutoff_min_Exponent)) |
+                    (digitExponent == cutoff_max_Exponent)) {
+                break;
+            }
+
+            /* store the output digit */
+            *curDigit = (char)('0' + outputDigit);
+            ++curDigit;
+
+            /* multiply larger by the output base */
+            BigInt_Multiply10(scaledValue);
+            BigInt_Multiply10(scaledMarginLow);
+            if (scaledMarginHigh != scaledMarginLow) {
+                BigInt_Multiply2(scaledMarginHigh, scaledMarginLow);
+            }
+        }
+    }
+    else {
+        /*
+         * For exact digit mode, we will try to print until we
+         * have exhausted all precision (i.e. all remaining digits are zeros) or
+         * until we reach the desired cutoff digit.
+         */
+        low = NPY_FALSE;
+        high = NPY_FALSE;
+
+        for (;;) {
+            digitExponent = digitExponent-1;
+
+            /* divide out the scale to extract the digit */
+            outputDigit =
+                BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale);
+            DEBUG_ASSERT(outputDigit < 10);
+
+            if ((scaledValue->length == 0) |
+                    (digitExponent == cutoff_max_Exponent)) {
+                break;
+            }
+
+            /* store the output digit */
+            *curDigit = (char)('0' + outputDigit);
+            ++curDigit;
+
+            /* multiply larger by the output base */
+            BigInt_Multiply10(scaledValue);
+        }
+    }
+
+    /* default to rounding down the final digit if value got too close to 0 */
+    roundDown = low;
+
+    /* if it is legal to round up and down */
+    if (low == high) {
+        npy_int32 compare;
+
+        /*
+         * round to the closest digit by comparing value with 0.5. To do this we
+         * need to convert the inequality to large integer values.
+         *  compare( value, 0.5 )
+         *  compare( scale * value, scale * 0.5 )
+         *  compare( 2 * scale * value, scale )
+         */
+        BigInt_Multiply2_inplace(scaledValue);
+        compare = BigInt_Compare(scaledValue, scale);
+        roundDown = compare < 0;
+
+        /*
+         * if we are directly in the middle, round towards the even digit (i.e.
+         * IEEE rounding rules)
+         */
+        if (compare == 0) {
+            roundDown = (outputDigit & 1) == 0;
+        }
+    }
+
+    /* print the rounded digit */
+    if (roundDown) {
+        *curDigit = (char)('0' + outputDigit);
+        ++curDigit;
+    }
+    else {
+        /* handle rounding up */
+        if (outputDigit == 9) {
+            /* find the first non-nine prior digit */
+            for (;;) {
+                /* if we are at the first digit */
+                if (curDigit == pOutBuffer) {
+                    /* output 1 at the next highest exponent */
+                    *curDigit = '1';
+                    ++curDigit;
+                    *pOutExponent += 1;
+                    break;
+                }
+
+                --curDigit;
+                if (*curDigit != '9') {
+                    /* increment the digit */
+                    *curDigit += 1;
+                    ++curDigit;
+                    break;
+                }
+            }
+        }
+        else {
+            /* values in the range [0,8] can perform a simple round up */
+            *curDigit = (char)('0' + outputDigit + 1);
+            ++curDigit;
+        }
+    }
+
+    /* return the number of digits output */
+    outputLen = (npy_uint32)(curDigit - pOutBuffer);
+    DEBUG_ASSERT(outputLen <= bufferSize);
+    return outputLen;
+}
+
+static npy_uint32
+FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
+                 npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
+                 npy_bool hasUnequalMargins, DigitMode digit_mode,
+                 CutoffMode cutoff_mode, npy_int32 precision,
+                 npy_int32 min_digits, TrimMode trim_mode,
+                 npy_int32 digits_left, npy_int32 digits_right)
+{
+    npy_int32 printExponent;
+    npy_int32 numDigits, numWholeDigits=0, has_sign=0;
+    npy_int32 add_digits;
+
+    npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0;
+
+    /* track the # of digits past the decimal point that have been printed */
+    npy_int32 numFractionDigits = 0, desiredFractionalDigits;
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    if (digit_mode != DigitMode_Unique) {
+        DEBUG_ASSERT(precision >= 0);
+    }
+
+    if (signbit == '+' && pos < maxPrintLen) {
+        buffer[pos++] = '+';
+        has_sign = 1;
+    }
+    else if (signbit == '-' && pos < maxPrintLen) {
+        buffer[pos++] = '-';
+        has_sign = 1;
+    }
+
+    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
+                        digit_mode, cutoff_mode, precision, min_digits,
+                        buffer + has_sign, maxPrintLen - has_sign,
+                        &printExponent);
+
+    DEBUG_ASSERT(numDigits > 0);
+    DEBUG_ASSERT(numDigits <= bufferSize);
+
+    /* if output has a whole number */
+    if (printExponent >= 0) {
+        /* leave the whole number at the start of the buffer */
+        numWholeDigits = printExponent+1;
+        if (numDigits <= numWholeDigits) {
+            npy_int32 count = numWholeDigits - numDigits;
+            pos += numDigits;
+
+            /* don't overflow the buffer */
+            if (pos + count > maxPrintLen) {
+                count = maxPrintLen - pos;
+            }
+
+            /* add trailing zeros up to the decimal point */
+            numDigits += count;
+            for ( ; count > 0; count--) {
+                buffer[pos++] = '0';
+            }
+        }
+        /* insert the decimal point prior to the fraction */
+        else if (numDigits > numWholeDigits) {
+            npy_int32 maxFractionDigits;
+
+            numFractionDigits = numDigits - numWholeDigits;
+            maxFractionDigits = maxPrintLen - numWholeDigits - 1 - pos;
+            if (numFractionDigits > maxFractionDigits) {
+                numFractionDigits = maxFractionDigits;
+            }
+
+            memmove(buffer + pos + numWholeDigits + 1,
+                    buffer + pos + numWholeDigits, numFractionDigits);
+            pos += numWholeDigits;
+            buffer[pos] = '.';
+            numDigits = numWholeDigits + 1 + numFractionDigits;
+            pos += 1 + numFractionDigits;
+        }
+    }
+    else {
+        /* shift out the fraction to make room for the leading zeros */
+        npy_int32 numFractionZeros = 0;
+        if (pos + 2 < maxPrintLen) {
+            npy_int32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i;
+
+            maxFractionZeros = maxPrintLen - 2 - pos;
+            numFractionZeros = -(printExponent + 1);
+            if (numFractionZeros > maxFractionZeros) {
+                numFractionZeros = maxFractionZeros;
+            }
+
+            digitsStartIdx = 2 + numFractionZeros;
+
+            /*
+             * shift the significant digits right such that there is room for
+             * leading zeros
+             */
+            numFractionDigits = numDigits;
+            maxFractionDigits = maxPrintLen - digitsStartIdx - pos;
+            if (numFractionDigits > maxFractionDigits) {
+                numFractionDigits = maxFractionDigits;
+            }
+
+            memmove(buffer + pos + digitsStartIdx, buffer + pos,
+                    numFractionDigits);
+
+            /* insert the leading zeros */
+            for (i = 2; i < digitsStartIdx; ++i) {
+                buffer[pos + i] = '0';
+            }
+
+            /* update the counts */
+            numFractionDigits += numFractionZeros;
+            numDigits = numFractionDigits;
+        }
+
+        /* add the decimal point */
+        if (pos + 1 < maxPrintLen) {
+            buffer[pos+1] = '.';
+        }
+
+        /* add the initial zero */
+        if (pos < maxPrintLen) {
+            buffer[pos] = '0';
+            numDigits += 1;
+        }
+        numWholeDigits = 1;
+        pos += 2 + numFractionDigits;
+    }
+
+    /* always add decimal point, except for DprZeros mode */
+    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
+            pos < maxPrintLen) {
+        buffer[pos++] = '.';
+    }
+
+    add_digits = digit_mode == DigitMode_Unique ? min_digits : precision;
+    desiredFractionalDigits = add_digits < 0 ? 0 : add_digits;
+    if (cutoff_mode == CutoffMode_TotalLength) {
+        desiredFractionalDigits = add_digits - numWholeDigits;
+    }
+
+    if (trim_mode == TrimMode_LeaveOneZero) {
+        /* if we didn't print any fractional digits, add a trailing 0 */
+        if (numFractionDigits == 0 && pos < maxPrintLen) {
+            buffer[pos++] = '0';
+            numFractionDigits++;
+        }
+    }
+    else if (trim_mode == TrimMode_None &&
+             desiredFractionalDigits > numFractionDigits &&
+             pos < maxPrintLen) {
+        /* add trailing zeros up to add_digits length */
+        /* compute the number of trailing zeros needed */
+        npy_int32 count = desiredFractionalDigits - numFractionDigits;
+        if (pos + count > maxPrintLen) {
+            count = maxPrintLen - pos;
+        }
+        numFractionDigits += count;
+
+        for ( ; count > 0; count--) {
+            buffer[pos++] = '0';
+        }
+    }
+    /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */
+
+    /*
+     * when rounding, we may still end up with trailing zeros. Remove them
+     * depending on trim settings.
+     */
+    if (trim_mode != TrimMode_None && numFractionDigits > 0) {
+        while (buffer[pos-1] == '0') {
+            pos--;
+            numFractionDigits--;
+        }
+        if (buffer[pos-1] == '.') {
+            /* in TrimMode_LeaveOneZero, add trailing 0 back */
+            if (trim_mode == TrimMode_LeaveOneZero){
+                buffer[pos++] = '0';
+                numFractionDigits++;
+            }
+            /* in TrimMode_DptZeros, remove trailing decimal point */
+            else if (trim_mode == TrimMode_DptZeros) {
+                    pos--;
+            }
+        }
+    }
+
+    /* add any whitespace padding to right side */
+    if (digits_right >= numFractionDigits) {
+        npy_int32 count = digits_right - numFractionDigits;
+
+        /* in trim_mode DptZeros, if right padding, add a space for the . */
+        if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0
+                && pos < maxPrintLen) {
+            buffer[pos++] = ' ';
+        }
+
+        if (pos + count > maxPrintLen) {
+            count = maxPrintLen - pos;
+        }
+
+        for ( ; count > 0; count--) {
+            buffer[pos++] = ' ';
+        }
+    }
+    /* add any whitespace padding to left side */
+    if (digits_left > numWholeDigits + has_sign) {
+        npy_int32 shift = digits_left - (numWholeDigits + has_sign);
+        npy_int32 count = pos;
+
+        if (count + shift > maxPrintLen) {
+            count = maxPrintLen - shift;
+        }
+
+        if (count > 0) {
+            memmove(buffer + shift, buffer, count);
+        }
+        pos = shift + count;
+        for ( ; shift > 0; shift--) {
+            buffer[shift - 1] = ' ';
+        }
+    }
+
+    /* terminate the buffer */
+    DEBUG_ASSERT(pos <= maxPrintLen);
+    buffer[pos] = '\0';
+
+    return pos;
+}
+
+static npy_uint32
+FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
+                  npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
+                  npy_bool hasUnequalMargins, DigitMode digit_mode,
+                  npy_int32 precision, npy_int32 min_digits, TrimMode trim_mode,
+                  npy_int32 digits_left, npy_int32 exp_digits)
+{
+    npy_int32 printExponent;
+    npy_int32 numDigits;
+    char *pCurOut;
+    npy_int32 numFractionDigits;
+    npy_int32 leftchars;
+    npy_int32 add_digits;
+
+    if (digit_mode != DigitMode_Unique) {
+        DEBUG_ASSERT(precision >= 0);
+    }
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    pCurOut = buffer;
+
+    /* add any whitespace padding to left side */
+    leftchars = 1 + (signbit == '-' || signbit == '+');
+    if (digits_left > leftchars) {
+        int i;
+        for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++) {
+            *pCurOut = ' ';
+            pCurOut++;
+            --bufferSize;
+        }
+    }
+
+    if (signbit == '+' && bufferSize > 1) {
+        *pCurOut = '+';
+        pCurOut++;
+        --bufferSize;
+    }
+    else if (signbit == '-'  && bufferSize > 1) {
+        *pCurOut = '-';
+        pCurOut++;
+        --bufferSize;
+    }
+
+    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
+                        digit_mode, CutoffMode_TotalLength,
+                        precision < 0 ? -1 : precision + 1,
+                        min_digits < 0 ? -1 : min_digits + 1,
+                        pCurOut, bufferSize, &printExponent);
+
+    DEBUG_ASSERT(numDigits > 0);
+    DEBUG_ASSERT(numDigits <= bufferSize);
+
+    /* keep the whole number as the first digit */
+    if (bufferSize > 1) {
+        pCurOut += 1;
+        bufferSize -= 1;
+    }
+
+    /* insert the decimal point prior to the fractional number */
+    numFractionDigits = numDigits-1;
+    if (numFractionDigits > 0 && bufferSize > 1) {
+        npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2;
+
+        if (numFractionDigits > maxFractionDigits) {
+            numFractionDigits =  maxFractionDigits;
+        }
+
+        memmove(pCurOut + 1, pCurOut, numFractionDigits);
+        pCurOut[0] = '.';
+        pCurOut += (1 + numFractionDigits);
+        bufferSize -= (1 + numFractionDigits);
+    }
+
+    /* always add decimal point, except for DprZeros mode */
+    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
+            bufferSize > 1) {
+        *pCurOut = '.';
+        ++pCurOut;
+        --bufferSize;
+    }
+
+    add_digits = digit_mode == DigitMode_Unique ? min_digits : precision;
+    add_digits = add_digits < 0 ? 0 : add_digits;
+    if (trim_mode == TrimMode_LeaveOneZero) {
+        /* if we didn't print any fractional digits, add the 0 */
+        if (numFractionDigits == 0 && bufferSize > 1) {
+            *pCurOut = '0';
+            ++pCurOut;
+            --bufferSize;
+            ++numFractionDigits;
+        }
+    }
+    else if (trim_mode == TrimMode_None) {
+        /* add trailing zeros up to add_digits length */
+        if (add_digits > (npy_int32)numFractionDigits) {
+            char *pEnd;
+            /* compute the number of trailing zeros needed */
+            npy_int32 numZeros = (add_digits - numFractionDigits);
+
+            if (numZeros > (npy_int32)bufferSize - 1) {
+                numZeros = (npy_int32)bufferSize - 1;
+            }
+
+            for (pEnd = pCurOut + numZeros; pCurOut < pEnd; ++pCurOut) {
+                *pCurOut = '0';
+                ++numFractionDigits;
+            }
+        }
+    }
+    /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */
+
+    /*
+     * when rounding, we may still end up with trailing zeros. Remove them
+     * depending on trim settings.
+     */
+    if (trim_mode != TrimMode_None && numFractionDigits > 0) {
+        --pCurOut;
+        while (*pCurOut == '0') {
+            --pCurOut;
+            ++bufferSize;
+            --numFractionDigits;
+        }
+        if (trim_mode == TrimMode_LeaveOneZero && *pCurOut == '.') {
+            ++pCurOut;
+            *pCurOut = '0';
+            --bufferSize;
+            ++numFractionDigits;
+        }
+        ++pCurOut;
+    }
+
+    /* print the exponent into a local buffer and copy into output buffer */
+    if (bufferSize > 1) {
+        char exponentBuffer[7];
+        npy_int32 digits[5];
+        npy_int32 i, exp_size, count;
+
+        if (exp_digits > 5) {
+            exp_digits = 5;
+        }
+        if (exp_digits < 0) {
+            exp_digits = 2;
+        }
+
+        exponentBuffer[0] = 'e';
+        if (printExponent >= 0) {
+            exponentBuffer[1] = '+';
+        }
+        else {
+            exponentBuffer[1] = '-';
+            printExponent = -printExponent;
+        }
+
+        DEBUG_ASSERT(printExponent < 100000);
+
+        /* get exp digits */
+        for (i = 0; i < 5; i++) {
+            digits[i] = printExponent % 10;
+            printExponent /= 10;
+        }
+        /* count back over leading zeros */
+        for (i = 5; i > exp_digits && digits[i-1] == 0; i--) {
+        }
+        exp_size = i;
+        /* write remaining digits to tmp buf */
+        for (i = exp_size; i > 0; i--) {
+            exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]);
+        }
+
+        /* copy the exponent buffer into the output */
+        count = exp_size + 2;
+        if (count > (npy_int32)bufferSize - 1) {
+            count = (npy_int32)bufferSize - 1;
+        }
+        memcpy(pCurOut, exponentBuffer, count);
+        pCurOut += count;
+        bufferSize -= count;
+    }
+
+
+    DEBUG_ASSERT(bufferSize > 0);
+    pCurOut[0] = '\0';
+
+    return pCurOut - buffer;
+}
+
+static npy_uint32
+Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
+                 npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
+                 npy_bool hasUnequalMargins, Dragon4_Options *opt)
+{
+    /* format the value */
+    if (opt->scientific) {
+        return FormatScientific(buffer, bufferSize, mantissa, exponent,
+                                signbit, mantissaBit, hasUnequalMargins,
+                                opt->digit_mode, opt->precision,
+                                opt->min_digits, opt->trim_mode,
+                                opt->digits_left, opt->exp_digits);
+    }
+    else {
+        return FormatPositional(buffer, bufferSize, mantissa, exponent,
+                                signbit, mantissaBit, hasUnequalMargins,
+                                opt->digit_mode, opt->cutoff_mode,
+                                opt->precision, opt->min_digits, opt->trim_mode,
+                                opt->digits_left, opt->digits_right);
+    }
+}
+
+
+static npy_uint32
+Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt)
+{
+    char *buffer = _bigint_static.repr;
+    const npy_uint32 bufferSize = sizeof(_bigint_static.repr);
+    BigInt *bigints = _bigint_static.bigints;
+
+    npy_uint32 floatExponent, floatSign;
+    npy_uint64 mantissa_hi, mantissa_lo;
+    npy_int32 exponent;
+    npy_uint32 mantissaBit;
+    npy_bool hasUnequalMargins;
+    char signbit = '\0';
+
+    // Extract the bits from the SLEEF quad value
+    union {
+        Sleef_quad q;
+        struct {
+            npy_uint64 lo;
+            npy_uint64 hi;
+        } i;
+    } u;
+    u.q = *value;
+
+    // Extract mantissa, exponent, and sign
+    mantissa_hi = u.i.hi & bitmask_u64(48);
+    mantissa_lo = u.i.lo;
+    floatExponent = (u.i.hi >> 48) & bitmask_u32(15);
+    floatSign = u.i.hi >> 63;
+
+    /* output the sign */
+    if (floatSign != 0) {
+        signbit = '-';
+    }
+    else if (opt->sign) {
+        signbit = '+';
+    }
+
+    /* if this is a special value */
+    if (floatExponent == bitmask_u32(15)) {
+        npy_uint64 mantissa_zero = mantissa_hi == 0 && mantissa_lo == 0;
+        return PrintInfNan(buffer, bufferSize, !mantissa_zero, 16, signbit);
+    }
+    /* else this is a number */
+
+    /* factor the value into its parts */
+    if (floatExponent != 0) {
+        /* normal */
+        mantissa_hi         = (1ull << 48) | mantissa_hi;
+        /* mantissa_lo is unchanged */
+        exponent            = floatExponent - 16383 - 112;
+        mantissaBit         = 112;
+        hasUnequalMargins   = (floatExponent != 1) && (mantissa_hi == 0 &&
+                                                       mantissa_lo == 0);
+    }
+    else {
+        /* subnormal */
+        exponent            = 1 - 16383 - 112;
+        mantissaBit         = LogBase2_128(mantissa_hi, mantissa_lo);
+        hasUnequalMargins   = NPY_FALSE;
+    }
+
+    BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo);
+    return Format_floatbits(buffer, bufferSize, bigints, exponent,
+                            signbit, mantissaBit, hasUnequalMargins, opt);
+}
+
+
+PyObject *
+Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt)
+{
+    PyObject *ret;
+    if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) {
+        return NULL;
+    }
+    ret = PyUnicode_FromString(_bigint_static.repr);
+    return ret;
+}
+
+PyObject *
+Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode,
+                   CutoffMode cutoff_mode, int precision, int min_digits,
+                   int sign, TrimMode trim, int pad_left, int pad_right)
+{
+    Dragon4_Options opt;
+    
+    opt.scientific = 0;
+    opt.digit_mode = digit_mode;
+    opt.cutoff_mode = cutoff_mode;
+    opt.precision = precision;
+    opt.min_digits = min_digits;
+    opt.sign = sign;
+    opt.trim_mode = trim;
+    opt.digits_left = pad_left;
+    opt.digits_right = pad_right;
+    opt.exp_digits = -1;
+
+    return Dragon4_Positional_QuadDType_opt(val, &opt);
+}
+
+PyObject *
+Dragon4_Scientific_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt)
+{
+    PyObject *ret;
+    if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) {
+        return NULL;
+    }
+    ret = PyUnicode_FromString(_bigint_static.repr);
+    return ret;
+}
+
+PyObject *
+Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision,
+                   int min_digits, int sign, TrimMode trim, int pad_left, 
+                   int exp_digits)
+{
+    Dragon4_Options opt;
+
+    opt.scientific = 1;
+    opt.digit_mode = digit_mode;
+    opt.cutoff_mode = CutoffMode_TotalLength;
+    opt.precision = precision;
+    opt.min_digits = min_digits;
+    opt.sign = sign;
+    opt.trim_mode = trim;
+    opt.digits_left = pad_left;
+    opt.digits_right = -1;
+    opt.exp_digits = exp_digits;
+
+    return Dragon4_Scientific_QuadDType_opt(val, &opt);
+}
+
+
+PyObject *
+Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode,
+                   int precision, int min_digits, int sign, TrimMode trim,
+                   int pad_left, int pad_right)
+{
+    npy_double v;
+
+    if (PyArray_IsScalar(obj, QuadPrecDType)) {
+        QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
+        if (quad_obj->backend == BACKEND_SLEEF) {
+            return Dragon4_Positional_QuadDType(&quad_obj->value.sleef_value, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right);
+        } else {
+            Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value);
+            return Dragon4_Positional_QuadDType(&sleef_val, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right);
+        }
+    }
+}
+
+PyObject *
+Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision,
+                   int min_digits, int sign, TrimMode trim, int pad_left,
+                   int exp_digits)
+{
+    npy_double val;
+
+    if (PyArray_IsScalar(obj, QuadPrecDType)) {
+        QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
+        if (quad_obj->backend == BACKEND_SLEEF) {
+            return Dragon4_Scientific_QuadDType(&quad_obj->value.sleef_value, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits);
+        } else {
+            Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value);
+            return Dragon4_Scientific_QuadDType(&sleef_val, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/dragon4.h b/quaddtype/numpy_quaddtype/src/dragon4.h
new file mode 100644
index 00000000..1977595e
--- /dev/null
+++ b/quaddtype/numpy_quaddtype/src/dragon4.h
@@ -0,0 +1,70 @@
+#ifndef _QUADDTYPE_DRAGON4_H
+#define _QUADDTYPE_DRAGON4_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <Python.h>
+#include "numpy/arrayobject.h"
+#include <sleef.h>
+#include "quad_common.h"
+
+typedef enum DigitMode
+{
+    /* Round digits to print shortest uniquely identifiable number. */
+    DigitMode_Unique,
+    /* Output the digits of the number as if with infinite precision */
+    DigitMode_Exact,
+} DigitMode;
+
+typedef enum CutoffMode
+{
+    /* up to cutoffNumber significant digits */
+    CutoffMode_TotalLength,
+    /* up to cutoffNumber significant digits past the decimal point */
+    CutoffMode_FractionLength,
+} CutoffMode;
+
+typedef enum TrimMode
+{
+    TrimMode_None,         /* don't trim zeros, always leave a decimal point */
+    TrimMode_LeaveOneZero, /* trim all but the zero before the decimal point */
+    TrimMode_Zeros,        /* trim all trailing zeros, leave decimal point */
+    TrimMode_DptZeros,     /* trim trailing zeros & trailing decimal point */
+} TrimMode;
+
+typedef struct {
+    int scientific;
+    DigitMode digit_mode;
+    CutoffMode cutoff_mode;
+    int precision;
+    int min_digits;
+    int sign;
+    TrimMode trim_mode;
+    int digits_left;
+    int digits_right;
+    int exp_digits;
+} Dragon4_Options;
+
+PyObject *Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode,
+                   CutoffMode cutoff_mode, int precision, int min_digits,
+                   int sign, TrimMode trim, int pad_left, int pad_right);
+
+PyObject *Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, 
+                   int precision, int min_digits, int sign, TrimMode trim, 
+                   int pad_left, int exp_digits);
+
+PyObject *Dragon4_Positional(PyObject *obj, DigitMode digit_mode, 
+                   CutoffMode cutoff_mode, int precision, int min_digits, 
+                   int sign, TrimMode trim, int pad_left, int pad_right);
+
+PyObject *Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision,
+                   int min_digits, int sign, TrimMode trim, int pad_left,
+                   int exp_digits);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _QUADDTYPE_DRAGON4_H */
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index 36bca565..4524f3db 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -15,6 +15,7 @@
 #include "scalar.h"
 #include "casts.h"
 #include "dtype.h"
+#include "dragon4.h"
 
 static inline int
 quad_load(void *x, char *data_ptr, QuadBackendType backend)
@@ -212,13 +213,22 @@ QuadPrecDType_repr(QuadPrecDTypeObject *self)
     return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str);
 }
 
+static PyObject *
+QuadPrecDType_str(QuadPrecDTypeObject *self)
+{
+    const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
+    return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str);
+}
+
+
+
 PyArray_DTypeMeta QuadPrecDType = {
         {{
                 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecDType",
                 .tp_basicsize = sizeof(QuadPrecDTypeObject),
                 .tp_new = QuadPrecDType_new,
                 .tp_repr = (reprfunc)QuadPrecDType_repr,
-                .tp_str = (reprfunc)QuadPrecDType_repr,
+                .tp_str = (reprfunc)QuadPrecDType_str,
         }},
 };
 
diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 89ec16f0..7c89d57f 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -15,6 +15,7 @@
 #include "quad_common.h"
 #include "float.h"
 
+
 static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) {
     if(sizeof(long double) == 16 && 
         LDBL_MANT_DIG == 113 && 
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 0b2967e5..7ed731d0 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -13,6 +13,7 @@
 
 #include "scalar.h"
 #include "scalar_ops.h"
+#include "dragon4.h"
 
 QuadPrecisionObject *
 QuadPrecision_raw_new(QuadBackendType backend)
@@ -128,6 +129,29 @@ QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
     return (PyObject *)QuadPrecision_from_object(value, backend);
 }
 
+static PyObject *
+QuadPrecision_str_dragon4(QuadPrecisionObject *self)
+{
+    Dragon4_Options opt = {
+        .scientific = 0,
+        .digit_mode = DigitMode_Unique,
+        .cutoff_mode = CutoffMode_TotalLength,
+        .precision = SLEEF_QUAD_DIG,
+        .sign = 1,
+        .trim_mode = TrimMode_LeaveOneZero,
+        .digits_left = 1,
+        .digits_right = SLEEF_QUAD_DIG
+    };
+
+    if (self->backend == BACKEND_SLEEF) {
+        return Dragon4_Positional_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right);
+    }
+    else {
+        Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value);
+        return Dragon4_Positional_QuadDType(&sleef_val,  opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right);
+    }
+}
+
 static PyObject *
 QuadPrecision_str(QuadPrecisionObject *self)
 {
@@ -154,6 +178,39 @@ QuadPrecision_repr(QuadPrecisionObject *self)
     return res;
 }
 
+static PyObject *
+QuadPrecision_repr_dragon4(QuadPrecisionObject *self)
+{
+    Dragon4_Options opt = {
+        .scientific = 1,
+        .digit_mode = DigitMode_Unique,
+        .cutoff_mode = CutoffMode_TotalLength,
+        .precision = SLEEF_QUAD_DIG,
+        .sign = 1,
+        .trim_mode = TrimMode_LeaveOneZero,
+        .digits_left = 1,
+        .exp_digits = 3
+    };
+
+    PyObject *str;
+    if (self->backend == BACKEND_SLEEF) {
+        str = Dragon4_Scientific_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits);
+    }
+    else {
+        Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value);
+        str = Dragon4_Scientific_QuadDType(&sleef_val, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits);
+    }
+
+    if (str == NULL) {
+        return NULL;
+    }
+
+    const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble";
+    PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str);
+    Py_DECREF(str);
+    return res;
+}
+
 static void
 QuadPrecision_dealloc(QuadPrecisionObject *self)
 {
@@ -166,8 +223,8 @@ PyTypeObject QuadPrecision_Type = {
         .tp_itemsize = 0,
         .tp_new = QuadPrecision_new,
         .tp_dealloc = (destructor)QuadPrecision_dealloc,
-        .tp_repr = (reprfunc)QuadPrecision_repr,
-        .tp_str = (reprfunc)QuadPrecision_str,
+        .tp_repr = (reprfunc)QuadPrecision_repr_dragon4,
+        .tp_str = (reprfunc)QuadPrecision_str_dragon4,
         .tp_as_number = &quad_as_scalar,
         .tp_richcompare = (richcmpfunc)quad_richcompare
 
diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h
index 344fef09..4fac1adf 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.h
+++ b/quaddtype/numpy_quaddtype/src/scalar.h
@@ -31,6 +31,9 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend);
 int
 init_quadprecision_scalar(void);
 
+#define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type)
+#define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value)
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 2444047c..95afed91 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -232,9 +232,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
     QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
     QuadBackendType target_backend;
 
-    const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
-    const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE";
-
     // Determine target backend and if casting is needed
     NPY_CASTING casting = NPY_NO_CASTING;
     if (descr_in1->backend != descr_in2->backend) {
@@ -323,7 +320,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;
     bool has_quad = false;
-    QuadBackendType backend = BACKEND_INVALID;  // Initialize to an invalid state
 
     // Handle the special case for reductions
     if (op_dtypes[0] == NULL) {

From 7b43f3ba1c4cddf27d94b2b02c2951484bcb60d2 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sat, 7 Sep 2024 12:58:45 +0530
Subject: [PATCH 11/32] fixing reduction redorderable issue

---
 quaddtype/meson.build                   | 58 +++++++++++++++++++++++++
 quaddtype/numpy_quaddtype/src/umath.cpp |  2 +-
 2 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 quaddtype/meson.build

diff --git a/quaddtype/meson.build b/quaddtype/meson.build
new file mode 100644
index 00000000..fde838c0
--- /dev/null
+++ b/quaddtype/meson.build
@@ -0,0 +1,58 @@
+project('numpy_quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true'])
+
+py_mod = import('python')
+py = py_mod.find_installation()
+
+c = meson.get_compiler('c')
+
+sleef_dep = c.find_library('sleef')
+sleefquad_dep = c.find_library('sleefquad')
+
+incdir_numpy = run_command(py,
+  [
+    '-c',
+    'import numpy; import os; print(os.path.relpath(numpy.get_include()))'
+  ],
+  check: true
+).stdout().strip()
+
+includes = include_directories(
+    [
+        incdir_numpy,
+        'numpy_quaddtype/src',
+    ]
+)
+
+srcs = [
+    'numpy_quaddtype/src/casts.h',
+    'numpy_quaddtype/src/casts.cpp',
+    'numpy_quaddtype/src/scalar.h',
+    'numpy_quaddtype/src/scalar.c',
+    'numpy_quaddtype/src/dtype.h',
+    'numpy_quaddtype/src/dtype.c',
+    'numpy_quaddtype/src/quaddtype_main.c',
+    'numpy_quaddtype/src/scalar_ops.h',
+    'numpy_quaddtype/src/scalar_ops.cpp',
+    'numpy_quaddtype/src/ops.hpp',
+    'numpy_quaddtype/src/umath.h',
+    'numpy_quaddtype/src/umath.cpp',
+    'numpy_quaddtype/src/dragon4.h',
+    'numpy_quaddtype/src/dragon4.c'
+]
+
+py.install_sources(
+    [
+        'numpy_quaddtype/__init__.py',
+    ],
+    subdir: 'numpy_quaddtype',
+    pure: false
+)
+
+py.extension_module('_quaddtype_main',
+srcs,
+c_args: ['-g', '-O0', '-lsleef', '-lsleefquad'],
+dependencies: [sleef_dep, sleefquad_dep],
+install: true,
+subdir: 'numpy_quaddtype',
+include_directories: includes
+)
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 95afed91..de2d982d 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -415,7 +415,7 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
             .nin = 2,
             .nout = 1,
             .casting = NPY_NO_CASTING,
-            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+            .flags = NPY_METH_IS_REORDERABLE,
             .dtypes = dtypes,
             .slots = slots,
     };

From 04480ce60aed5480dbde68df3e36e856e6b53f4e Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sat, 7 Sep 2024 18:23:20 +0530
Subject: [PATCH 12/32] np.dot WIP

---
 quaddtype/numpy_quaddtype/src/scalar.c       |   4 +-
 quaddtype/numpy_quaddtype/src/scalar_ops.cpp |  23 ++
 temp.py                                      | 283 +++++++++++++++++++
 3 files changed, 308 insertions(+), 2 deletions(-)
 create mode 100644 temp.py

diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 7ed731d0..f1cbc6d4 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -226,12 +226,12 @@ PyTypeObject QuadPrecision_Type = {
         .tp_repr = (reprfunc)QuadPrecision_repr_dragon4,
         .tp_str = (reprfunc)QuadPrecision_str_dragon4,
         .tp_as_number = &quad_as_scalar,
-        .tp_richcompare = (richcmpfunc)quad_richcompare
-
+        .tp_richcompare = (richcmpfunc)quad_richcompare,
 };
 
 int
 init_quadprecision_scalar(void)
 {
+    QuadPrecision_Type.tp_base = &PyFloat_Type; // this is not working (subclassing to np.floating)
     return PyType_Ready(&QuadPrecision_Type);
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
index 156e4288..2faf810d 100644
--- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
+++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
@@ -207,6 +207,27 @@ quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op)
     return PyBool_FromLong(cmp);
 }
 
+static PyObject *
+QuadPrecision_float(QuadPrecisionObject *self)
+{
+    if (self->backend == BACKEND_SLEEF) {
+        return PyFloat_FromDouble(Sleef_cast_to_doubleq1(self->value.sleef_value));
+    } else {
+        return PyFloat_FromDouble((double)self->value.longdouble_value);
+    }
+}
+
+static PyObject *
+QuadPrecision_int(QuadPrecisionObject *self)
+{
+    if (self->backend == BACKEND_SLEEF) {
+        return PyLong_FromLongLong(Sleef_cast_to_int64q1(self->value.sleef_value));
+    } else {
+        return PyLong_FromLongLong((long long)self->value.longdouble_value);
+    }
+}
+
+
 PyNumberMethods quad_as_scalar = {
         .nb_add = (binaryfunc)quad_binary_func<quad_add, ld_add>,
         .nb_subtract = (binaryfunc)quad_binary_func<quad_sub, ld_sub>,
@@ -216,5 +237,7 @@ PyNumberMethods quad_as_scalar = {
         .nb_positive = (unaryfunc)quad_unary_func<quad_positive, ld_positive>,
         .nb_absolute = (unaryfunc)quad_unary_func<quad_absolute, ld_absolute>,
         .nb_bool = (inquiry)quad_nonzero,
+        .nb_int = (unaryfunc)QuadPrecision_int,
+        .nb_float = (unaryfunc)QuadPrecision_float,
         .nb_true_divide = (binaryfunc)quad_binary_func<quad_div, ld_div>,
 };
\ No newline at end of file
diff --git a/temp.py b/temp.py
new file mode 100644
index 00000000..3ef85e95
--- /dev/null
+++ b/temp.py
@@ -0,0 +1,283 @@
+import numpy_quaddtype as npq
+import numpy as np
+
+
+def test_scalar_ops(backend):
+    print(f"\nTesting scalar operations for {backend} backend:")
+
+    # Create QuadPrecision instances
+    q1 = npq.QuadPrecision(
+        "3.14159265358979323846264338327950288", backend=backend)
+    q2 = npq.QuadPrecision(
+        "-2.71828182845904523536028747135266250", backend=backend)
+
+    # Test unary operations
+    print("\nUnary operations:")
+    print(f"  Negation of q1: {-q1}")
+    print(f"  Absolute value of q2: {abs(q2)}")
+
+    # Test binary operations
+    print("\nBinary operations:")
+    print(f"  Addition: {q1 + q2}")
+    print(f"  Subtraction: {q1 - q2}")
+    print(f"  Multiplication: {q1 * q2}")
+    print(f"  Division: {q1 / q2}")
+
+    # Test comparison operations
+    print("\nComparison operations:")
+    print(f"  q1 == q2: {q1 == q2}")
+    print(f"  q1 != q2: {q1 != q2}")
+    print(f"  q1 < q2: {q1 < q2}")
+    print(f"  q1 <= q2: {q1 <= q2}")
+    print(f"  q1 > q2: {q1 > q2}")
+    print(f"  q1 >= q2: {q1 >= q2}")
+
+    # Test operations with Python numbers
+    print("\nOperations with Python numbers:")
+    print(f"  q1 + 1: {q1 + 1}")
+    print(f"  q1 - 2.5: {q1 - 2.5}")
+    print(f"  q1 * 3: {q1 * 3}")
+    print(f"  q1 / 2: {q1 / 2}")
+
+    # Test boolean conversion
+    print("\nBoolean conversion:")
+    print(f"  bool(q1): {np.bool(q1)}")
+    print(
+        f"  bool(npq.QuadPrecision('0', backend=backend)): {np.bool(npq.QuadPrecision('0', backend=backend))}")
+
+
+def test_casting(backend):
+    print(f"\nTesting {backend} backend:")
+
+    # Create QuadPrecision instances
+    q1 = npq.QuadPrecision(
+        "3.14159265358979323846264338327950288", backend=backend)
+    q2 = npq.QuadPrecision(
+        "-2.71828182845904523536028747135266250", backend=backend)
+
+    # Test casting from QuadPrecision to numpy dtypes
+    print("Casting from QuadPrecision to numpy dtypes:")
+    print(f"  float32: {np.float32(q1)}")
+    print(f"  float64: {np.float64(q1)}")
+    print(f"  int64: {np.int64(q1)}")
+    print(f"  uint64: {np.uint64(q1)}")
+
+    # Test casting from numpy dtypes to QuadPrecision
+    print("\nCasting from numpy dtypes to QuadPrecision:")
+    print(
+        f"  float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}")
+    print(
+        f"  float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}")
+    print(
+        f"  int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}")
+    print(
+        f"  uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}")
+
+    # Test array operations
+    print("\nArray operations:")
+    q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend))
+    print(f"  QuadPrecision array: {q_array}")
+
+    np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64)
+    q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend))
+    print(f"  Numpy to QuadPrecision: {q_from_np}")
+
+    back_to_np = np.array(q_from_np, dtype=np.float64)
+    print(f"  QuadPrecision to Numpy: {back_to_np}")
+
+    # Test precision maintenance
+    large_int = 12345678901234567890
+    q_large = np.array([large_int], dtype=np.uint64).astype(
+        npq.QuadPrecDType(backend=backend))[0]
+    print(f"\nPrecision test:")
+    print(f"  Original large int: {large_int}")
+    print(f"  QuadPrecision: {q_large}")
+    print(f"  Back to int: {np.uint64(q_large)}")
+
+    # Test edge cases
+
+
+def test_edge_cases(backend):
+    print(f"\nTesting negative numbers for {backend} backend:")
+
+    # Test various negative numbers
+    test_values = [
+        -1.0,
+        -1e10,
+        -1e100,
+        -1e300,
+        np.nextafter(np.finfo(np.float64).min, 0),
+        np.finfo(np.float64).min
+    ]
+
+    for value in test_values:
+        q_value = npq.QuadPrecision(str(value), backend=backend)
+        print(f"  Original: {value}")
+        print(f"  QuadPrecision: {q_value}")
+        print(f"  Back to float64: {np.float64(q_value)}")
+        print()
+
+    # Test value beyond float64 precision
+    beyond_float64_precision = "1.7976931348623157081452742373170435e+308"
+    q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend)
+    print(f"  Beyond float64 precision: {q_beyond}")
+    q_float64_max = npq.QuadPrecision(
+        str(np.finfo(np.float64).max), backend=backend)
+    diff = q_beyond - q_float64_max
+    print(f"  Difference from float64 max: {diff}")
+    print(
+        f"  Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}")
+
+    # Test epsilon (smallest representable difference between two numbers)
+    q_epsilon = npq.QuadPrecision(
+        str(np.finfo(np.float64).eps), backend=backend)
+    print(f"  Float64 epsilon in QuadPrecision: {q_epsilon}")
+    q_one = npq.QuadPrecision("1", backend=backend)
+    q_one_plus_epsilon = q_one + q_epsilon
+    print(f"  1 + epsilon != 1: {q_one_plus_epsilon != q_one}")
+    print(f"  (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}")
+
+
+def test_ufuncs(backend):
+    print(f"\nTesting ufuncs for {backend} backend:")
+
+    # Create QuadPrecision arrays
+    q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
+    q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
+
+    # Test unary ufuncs
+    print("\nUnary unfuncs:")
+    print(f"  negative: {np.negative(q_array1)}")
+    print(f"  absolute: {np.absolute(q_array1)}")
+    print(f"  rint: {np.rint(q_array1)}")
+    print(f"  floor: {np.floor(q_array1)}")
+    print(f"  ceil: {np.ceil(q_array1)}")
+    print(f"  trunc: {np.trunc(q_array1)}")
+    print(f"  sqrt: {np.sqrt(q_array1)}")
+    print(f"  square: {np.square(q_array1)}")
+    print(f"  log: {np.log(q_array1)}")
+    print(f"  log2: {np.log2(q_array1)}")
+    print(f"  log10: {np.log10(q_array1)}")
+    print(f"  exp: {np.exp(q_array1)}")
+    print(f"  exp2: {np.exp2(q_array1)}")
+
+    # Test binary ufuncs
+    print("\nBinary ufuncs:")
+    print(f"  add: {np.add(q_array1, q_array2)}")
+    print(f"  subtract: {np.subtract(q_array1, q_array2)}")
+    print(f"  multiply: {np.multiply(q_array1, q_array2)}")
+    print(f"  divide: {np.divide(q_array1, q_array2)}")
+    print(f"  power: {np.power(q_array1, q_array2)}")
+    print(f"  mod: {np.mod(q_array1, q_array2)}")
+    print(f"  minimum: {np.minimum(q_array1, q_array2)}")
+    print(f"  maximum: {np.maximum(q_array1, q_array2)}")
+
+    # Test comparison ufuncs
+    print("\nComparison ufuncs:")
+    print(f"  equal: {np.equal(q_array1, q_array2)}")
+    print(f"  not_equal: {np.not_equal(q_array1, q_array2)}")
+    print(f"  less: {np.less(q_array1, q_array2)}")
+    print(f"  less_equal: {np.less_equal(q_array1, q_array2)}")
+    print(f"  greater: {np.greater(q_array1, q_array2)}")
+    print(f"  greater_equal: {np.greater_equal(q_array1, q_array2)}")
+
+    # Test mixed operations with numpy arrays
+    print(f"Testing backend: {backend}")
+    print("\nMixed operations with numpy arrays:")
+    np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64)
+    print(f"  add: {np.add(q_array1, np_array)}")
+    print(f"  multiply: {np.multiply(q_array1, np_array)}")
+    print(f"  divide: {np.divide(q_array1, np_array)}")
+
+    # Test reduction operations
+    print("\nReduction operations:")
+    print(f"  sum: {np.sum(q_array1)}")
+    print(f"  prod: {np.prod(q_array1)}")
+    print(f"  min: {np.min(q_array1)}")
+    print(f"  max: {np.max(q_array1)}")
+
+from numpy_quaddtype import QuadPrecision, QuadPrecDType
+
+def test_quad_precision():
+    print("Testing QuadPrecision scalar:")
+    
+    # Test different initializations
+    values = [
+        0,
+        1,
+        -1,
+        3.14159265358979323846,
+        1e100,
+        1e-100,
+        float('inf'),
+        float('-inf'),
+        float('nan')
+    ]
+
+    for val in values:
+        q = QuadPrecision(val)
+        print(f"Value: {val}")
+        print(f"  str: {str(q)}")
+        print(f"  repr: {repr(q)}")
+
+    # Test different backends
+    print("\nTesting backends:")
+    q_sleef = QuadPrecision(3.14159265358979323846, backend='sleef')
+    q_longdouble = QuadPrecision(3.14159265358979323846, backend='longdouble')
+    print(f"Sleef:      {q_sleef}")
+    print(f"Long double: {q_longdouble}")
+
+def test_quad_dtype():
+    print("\nTesting QuadPrecDType:")
+
+    # Create an array with QuadPrecDType
+    arr = np.array([0, 1, -1, 3.14159265358979323846, 1e100, 1e-100], 
+                   dtype=QuadPrecDType())
+    
+    print("Array elements:")
+    for elem in arr:
+        print(f"  {elem}")
+
+    print("\nFull array:")
+    print(arr)
+
+    # Test different backends
+    print("\nTesting backends in arrays:")
+    arr_sleef = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='sleef'))
+    arr_longdouble = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='longdouble'))
+    print(f"Sleef array:       {arr_sleef}")
+    print(f"Long double array: {arr_longdouble}")
+
+def test_operations():
+    print("\nTesting basic operations:")
+    a = QuadPrecision(3.14159265358979323846)
+    b = QuadPrecision(2.71828182845904523536)
+    
+    print(f"a = {a}")
+    print(f"b = {b}")
+    print(f"a + b = {a + b}")
+    print(f"a - b = {a - b}")
+    print(f"a * b = {a * b}")
+    print(f"a / b = {a / b}")
+
+def test():
+    # Run tests for both backends
+    for backend in ['sleef', 'longdouble']:
+        test_scalar_ops(backend)
+        test_casting(backend)
+        test_edge_cases(backend)
+        test_ufuncs(backend)
+        test_quad_precision()
+        test_quad_dtype()
+        test_operations()
+        print("*"*50)
+
+    print("All tests completed successfully")
+
+def dot(a, b):
+    r = np.dot(a, b)
+    return r
+
+if __name__ == "__main__":
+    a = np.array([1, 2, 3], dtype=QuadPrecDType())
+    print(dot(a, a))
\ No newline at end of file

From 755e2cdc141531378e4aae3feb78a5c23ee4c9ae Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Tue, 10 Sep 2024 08:30:15 +0530
Subject: [PATCH 13/32] fixing base

---
 quaddtype/numpy_quaddtype/src/dragon4.c | 724 +++++++++++-------------
 quaddtype/numpy_quaddtype/src/dtype.c   |  16 +-
 2 files changed, 348 insertions(+), 392 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c
index 313ff312..1ac9420b 100644
--- a/quaddtype/numpy_quaddtype/src/dragon4.c
+++ b/quaddtype/numpy_quaddtype/src/dragon4.c
@@ -20,10 +20,12 @@
 #if 0
 #define DEBUG_ASSERT(stmnt) assert(stmnt)
 #else
-#define DEBUG_ASSERT(stmnt) do {} while(0)
+#define DEBUG_ASSERT(stmnt) \
+    do {                    \
+    } while (0)
 #endif
 
-#define c_BigInt_MaxBlocks  1023
+#define c_BigInt_MaxBlocks 1023
 #define BIGINT_DRAGON4_GROUPSIZE 7
 
 typedef struct BigInt {
@@ -59,7 +61,7 @@ BigInt_Multiply10(BigInt *result)
 
     npy_uint32 *cur = result->blocks;
     npy_uint32 *end = result->blocks + result->length;
-    for ( ; cur != end; ++cur) {
+    for (; cur != end; ++cur) {
         npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry;
         (*cur) = (npy_uint32)(product & bitmask_u64(32));
         carry = product >> 32;
@@ -73,16 +75,15 @@ BigInt_Multiply10(BigInt *result)
     }
 }
 
-static npy_uint32 g_PowerOf10_U32[] =
-{
-    1,          /* 10 ^ 0 */
-    10,         /* 10 ^ 1 */
-    100,        /* 10 ^ 2 */
-    1000,       /* 10 ^ 3 */
-    10000,      /* 10 ^ 4 */
-    100000,     /* 10 ^ 5 */
-    1000000,    /* 10 ^ 6 */
-    10000000,   /* 10 ^ 7 */
+static npy_uint32 g_PowerOf10_U32[] = {
+        1,        /* 10 ^ 0 */
+        10,       /* 10 ^ 1 */
+        100,      /* 10 ^ 2 */
+        1000,     /* 10 ^ 3 */
+        10000,    /* 10 ^ 4 */
+        100000,   /* 10 ^ 5 */
+        1000000,  /* 10 ^ 6 */
+        10000000, /* 10 ^ 7 */
 };
 
 /*
@@ -92,194 +93,188 @@ static npy_uint32 g_PowerOf10_U32[] =
  *       the BigInt struct which would allow us to store a minimal amount of
  *       data here.
  */
-static BigInt g_PowerOf10_Big[] =
-{
-    /* 10 ^ 8 */
-    { 1, { 100000000 } },
-    /* 10 ^ 16 */
-    { 2, { 0x6fc10000, 0x002386f2 } },
-    /* 10 ^ 32 */
-    { 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } },
-    /* 10 ^ 64 */
-    { 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed,
-           0xe93ff9f4, 0x00184f03, } },
-    /* 10 ^ 128 */
-    { 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01,
-            0x03df9909, 0x0f1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08,
-            0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } },
-    /* 10 ^ 256 */
-    { 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-            0x00000000, 0x00000000, 0x00000000, 0x982e7c01, 0xbed3875b,
-            0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, 0xd595d80f,
-            0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e,
-            0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc,
-            0x5fdcefce, 0x000553f7, } },
-    /* 10 ^ 512 */
-    { 54, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-            0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-            0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-            0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f,
-            0xb83a8a97, 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1,
-            0x4475b579, 0x28f8733b, 0xaa1da1bf, 0x703ed321, 0x1e25cfea,
-            0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae,
-            0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0,
-            0x93abade4, 0x1366732a, 0x9449775c, 0x69be5b0e, 0x7343afac,
-            0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, 0x8a0b1f13,
-            0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, } },
-    /* 10 ^ 1024 */
-    { 107, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b,
-             0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a,
-             0xcc2cd819, 0x0ed1440e, 0x896634ee, 0x7de16cfb, 0x1e43f61f,
-             0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, 0xf451218b,
-             0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07,
-             0x62be695a, 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5,
-             0xba34c411, 0xd814b505, 0xbf3fdeb3, 0x8fc51a16, 0xb1b896bc,
-             0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb,
-             0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66,
-             0xfc8f1f90, 0x976a3310, 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8,
-             0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, 0x31b5653f,
-             0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b,
-             0x1640114a, 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06,
-             0x73c55349, 0xa7e6bd2a, 0xc1a6970c, 0x47187094, 0xd2db49ef,
-             0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94,
-             0xd9d61a05, 0x00000325, } },
-    /* 10 ^ 2048 */
-    { 213, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1333e001,
-             0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354,
-             0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773,
-             0xea50d142, 0x51b78db2, 0x98342c9e, 0xc850dabc, 0x866ed6f1,
-             0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, 0x71c7fd8f,
-             0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1,
-             0x9713b449, 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643,
-             0xe3cb64f3, 0x8074ff01, 0xe3892ee9, 0x10c17f94, 0xa8f16f92,
-             0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44,
-             0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974,
-             0xd6c7d6ab, 0x8e48ff35, 0x4419bd43, 0xc4a65665, 0x685e5510,
-             0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, 0x982da466,
-             0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e,
-             0x1aa9f444, 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62,
-             0x944545ff, 0x40660460, 0x77c1a42f, 0xc9ac375d, 0xe866d7ef,
-             0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19,
-             0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e,
-             0x785ce6b7, 0x56c6815b, 0x6f46eadb, 0x4eeebeee, 0x195355d8,
-             0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, 0xde9ec24b,
-             0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b,
-             0xfb8886fb, 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943,
-             0x58ccf45c, 0xf82df846, 0x415c7f3e, 0x2915e818, 0x8b3d5cf4,
-             0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34,
-             0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14,
-             0xe0de0840, 0xf775677c, 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9,
-             0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, 0x907855ee,
-             0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d,
-             0x0af908b4, 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6,
-             0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, 0x11eb962b, 0x9648b21c,
-             0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac,
-             0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, } },
-    /* 10 ^ 4096 */
-    { 426, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-             0x00000000, 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d,
-             0x8efe7ae7, 0xf89a1e90, 0xef084117, 0x54e05154, 0x13b1bb51,
-             0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3,
-             0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da,
-             0x1d1f4cce, 0x9b8ba1e8, 0xd6bfdf75, 0xe341be10, 0xc2dfae78,
-             0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, 0xcab3e6d7,
-             0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982,
-             0xf8169c84, 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905,
-             0x54ab9c41, 0x7613b224, 0x1a1c304e, 0x3bfe167b, 0x441c2d47,
-             0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e,
-             0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507,
-             0x3f8ec72a, 0xfd82373a, 0x95a842bc, 0x280f4d32, 0xf3618ac0,
-             0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, 0xdcfe388f,
-             0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c,
-             0x655fec30, 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed,
-             0x3b9af26b, 0xc994a450, 0xb5341743, 0x75a697b2, 0xac50b9c1,
-             0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb,
-             0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa,
-             0x176ca776, 0x77fb8af6, 0xeca19beb, 0x92baf1de, 0xaf63b712,
-             0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, 0x87d1cde8,
-             0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b,
-             0x1fa8fd5d, 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925,
-             0xf984d83e, 0x7a872e74, 0xc19e1faf, 0xed4d542d, 0xecf9b5d0,
-             0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a,
-             0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8,
-             0xab312bb9, 0x1f2b2322, 0x440b3f25, 0xbf627ede, 0x72dac789,
-             0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, 0xbb9373f4,
-             0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb,
-             0x3172826a, 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9,
-             0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, 0x3d326a7e, 0xeeaf19e6,
-             0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4,
-             0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf,
-             0x9ee3352c, 0x3613c850, 0xfe78d682, 0x788f6e50, 0x5b060904,
-             0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, 0xa6e9cfda,
-             0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8,
-             0x4dcd6cd9, 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc,
-             0xc32da383, 0xfb126ab1, 0xa03f40a8, 0xed5bf546, 0xe9ce4724,
-             0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb,
-             0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999,
-             0x35e836b5, 0xa96ddeff, 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d,
-             0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, 0x00fc509b,
-             0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407,
-             0x53400d03, 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc,
-             0x41a4c954, 0xe03b2b63, 0x437b3d7f, 0x97aced8e, 0xcbd66670,
-             0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df,
-             0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4,
-             0x56c02d9a, 0x8c4ef804, 0xc1a08a13, 0x13fd01c8, 0xe6d27671,
-             0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, 0x544f10cd,
-             0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb,
-             0x84e857fd, 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982,
-             0xaec29e6e, 0x87ec7038, 0x7000a519, 0xaeee333b, 0xff66e42c,
-             0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e,
-             0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f,
-             0xac7867f9, 0x52ec8721, 0xbcdd9d4a, 0x8edd2d00, 0x3557de06,
-             0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, 0xb6100fab,
-             0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5,
-             0x35fcb457, 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21,
-             0x4964046f, 0x18ae4240, 0xd868b275, 0x8bd2b496, 0x1c5563f4,
-             0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee,
-             0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145,
-             0x00000062, } },
+static BigInt g_PowerOf10_Big[] = {
+        /* 10 ^ 8 */
+        {1, {100000000}},
+        /* 10 ^ 16 */
+        {2, {0x6fc10000, 0x002386f2}},
+        /* 10 ^ 32 */
+        {4,
+         {
+                 0x00000000,
+                 0x85acef81,
+                 0x2d6d415b,
+                 0x000004ee,
+         }},
+        /* 10 ^ 64 */
+        {7,
+         {
+                 0x00000000,
+                 0x00000000,
+                 0xbf6a1f01,
+                 0x6e38ed64,
+                 0xdaa797ed,
+                 0xe93ff9f4,
+                 0x00184f03,
+         }},
+        /* 10 ^ 128 */
+        {14,
+         {
+                 0x00000000,
+                 0x00000000,
+                 0x00000000,
+                 0x00000000,
+                 0x2e953e01,
+                 0x03df9909,
+                 0x0f1538fd,
+                 0x2374e42f,
+                 0xd3cff5ec,
+                 0xc404dc08,
+                 0xbccdb0da,
+                 0xa6337f19,
+                 0xe91f2603,
+                 0x0000024e,
+         }},
+        /* 10 ^ 256 */
+        {27,
+         {
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70,
+                 0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, 0xcc5573c0,
+                 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x000553f7,
+         }},
+        /* 10 ^ 512 */
+        {54,
+         {
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f, 0xb83a8a97,
+                 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1, 0x4475b579, 0x28f8733b, 0xaa1da1bf,
+                 0x703ed321, 0x1e25cfea, 0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae,
+                 0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0, 0x93abade4, 0x1366732a,
+                 0x9449775c, 0x69be5b0e, 0x7343afac, 0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303,
+                 0x8a0b1f13, 0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c,
+         }},
+        /* 10 ^ 1024 */
+        {107,
+         {
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b,
+                 0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a, 0xcc2cd819, 0x0ed1440e,
+                 0x896634ee, 0x7de16cfb, 0x1e43f61f, 0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7,
+                 0xf451218b, 0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07, 0x62be695a,
+                 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5, 0xba34c411, 0xd814b505, 0xbf3fdeb3,
+                 0x8fc51a16, 0xb1b896bc, 0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb,
+                 0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66, 0xfc8f1f90, 0x976a3310,
+                 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8, 0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583,
+                 0x31b5653f, 0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b, 0x1640114a,
+                 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06, 0x73c55349, 0xa7e6bd2a, 0xc1a6970c,
+                 0x47187094, 0xd2db49ef, 0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94,
+                 0xd9d61a05, 0x00000325,
+         }},
+        /* 10 ^ 2048 */
+        {213,
+         {
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x1333e001, 0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354,
+                 0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773, 0xea50d142, 0x51b78db2,
+                 0x98342c9e, 0xc850dabc, 0x866ed6f1, 0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a,
+                 0x71c7fd8f, 0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1, 0x9713b449,
+                 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643, 0xe3cb64f3, 0x8074ff01, 0xe3892ee9,
+                 0x10c17f94, 0xa8f16f92, 0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44,
+                 0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974, 0xd6c7d6ab, 0x8e48ff35,
+                 0x4419bd43, 0xc4a65665, 0x685e5510, 0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d,
+                 0x982da466, 0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e, 0x1aa9f444,
+                 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62, 0x944545ff, 0x40660460, 0x77c1a42f,
+                 0xc9ac375d, 0xe866d7ef, 0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19,
+                 0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e, 0x785ce6b7, 0x56c6815b,
+                 0x6f46eadb, 0x4eeebeee, 0x195355d8, 0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019,
+                 0xde9ec24b, 0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b, 0xfb8886fb,
+                 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943, 0x58ccf45c, 0xf82df846, 0x415c7f3e,
+                 0x2915e818, 0x8b3d5cf4, 0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34,
+                 0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14, 0xe0de0840, 0xf775677c,
+                 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9, 0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62,
+                 0x907855ee, 0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d, 0x0af908b4,
+                 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6, 0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c,
+                 0x11eb962b, 0x9648b21c, 0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac,
+                 0xde4a74d2, 0xb5dc53d5, 0x0009e8b3,
+         }},
+        /* 10 ^ 4096 */
+        {426,
+         {
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d, 0x8efe7ae7, 0xf89a1e90, 0xef084117,
+                 0x54e05154, 0x13b1bb51, 0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3,
+                 0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da, 0x1d1f4cce, 0x9b8ba1e8,
+                 0xd6bfdf75, 0xe341be10, 0xc2dfae78, 0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574,
+                 0xcab3e6d7, 0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982, 0xf8169c84,
+                 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905, 0x54ab9c41, 0x7613b224, 0x1a1c304e,
+                 0x3bfe167b, 0x441c2d47, 0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e,
+                 0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507, 0x3f8ec72a, 0xfd82373a,
+                 0x95a842bc, 0x280f4d32, 0xf3618ac0, 0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898,
+                 0xdcfe388f, 0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c, 0x655fec30,
+                 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed, 0x3b9af26b, 0xc994a450, 0xb5341743,
+                 0x75a697b2, 0xac50b9c1, 0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb,
+                 0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa, 0x176ca776, 0x77fb8af6,
+                 0xeca19beb, 0x92baf1de, 0xaf63b712, 0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0,
+                 0x87d1cde8, 0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b, 0x1fa8fd5d,
+                 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925, 0xf984d83e, 0x7a872e74, 0xc19e1faf,
+                 0xed4d542d, 0xecf9b5d0, 0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a,
+                 0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8, 0xab312bb9, 0x1f2b2322,
+                 0x440b3f25, 0xbf627ede, 0x72dac789, 0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab,
+                 0xbb9373f4, 0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb, 0x3172826a,
+                 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9, 0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf,
+                 0x3d326a7e, 0xeeaf19e6, 0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4,
+                 0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf, 0x9ee3352c, 0x3613c850,
+                 0xfe78d682, 0x788f6e50, 0x5b060904, 0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857,
+                 0xa6e9cfda, 0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8, 0x4dcd6cd9,
+                 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc, 0xc32da383, 0xfb126ab1, 0xa03f40a8,
+                 0xed5bf546, 0xe9ce4724, 0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb,
+                 0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999, 0x35e836b5, 0xa96ddeff,
+                 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d, 0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d,
+                 0x00fc509b, 0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407, 0x53400d03,
+                 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc, 0x41a4c954, 0xe03b2b63, 0x437b3d7f,
+                 0x97aced8e, 0xcbd66670, 0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df,
+                 0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4, 0x56c02d9a, 0x8c4ef804,
+                 0xc1a08a13, 0x13fd01c8, 0xe6d27671, 0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89,
+                 0x544f10cd, 0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb, 0x84e857fd,
+                 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982, 0xaec29e6e, 0x87ec7038, 0x7000a519,
+                 0xaeee333b, 0xff66e42c, 0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e,
+                 0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f, 0xac7867f9, 0x52ec8721,
+                 0xbcdd9d4a, 0x8edd2d00, 0x3557de06, 0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1,
+                 0xb6100fab, 0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5, 0x35fcb457,
+                 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21, 0x4964046f, 0x18ae4240, 0xd868b275,
+                 0x8bd2b496, 0x1c5563f4, 0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee,
+                 0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145, 0x00000062,
+         }},
 };
 
 static int
@@ -294,14 +289,14 @@ BigInt_IsZero(const BigInt *i)
 static int
 BigInt_IsEven(const BigInt *i)
 {
-    return (i->length == 0) || ( (i->blocks[0] % 2) == 0);
+    return (i->length == 0) || ((i->blocks[0] % 2) == 0);
 }
 
 static void
 BigInt_Copy(BigInt *dst, const BigInt *src)
 {
     npy_uint32 length = src->length;
-    npy_uint32 * dstp = dst->blocks;
+    npy_uint32 *dstp = dst->blocks;
     const npy_uint32 *srcp;
     for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) {
         *dstp = *srcp;
@@ -329,15 +324,13 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift)
         npy_uint32 i;
 
         /* copy blocks from high to low */
-        for (pInCur = result->blocks + result->length,
-                 pOutCur = pInCur + shiftBlocks;
-                 pInCur >= pInBlocks;
-                 --pInCur, --pOutCur) {
+        for (pInCur = result->blocks + result->length, pOutCur = pInCur + shiftBlocks;
+             pInCur >= pInBlocks; --pInCur, --pOutCur) {
             *pOutCur = *pInCur;
         }
 
         /* zero the remaining low blocks */
-        for (i  = 0; i < shiftBlocks; ++i) {
+        for (i = 0; i < shiftBlocks; ++i) {
             result->blocks[i] = 0;
         }
 
@@ -373,7 +366,7 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift)
         /* output the final blocks */
         DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1);
         result->blocks[outBlockIdx] = highBits | lowBits;
-        result->blocks[outBlockIdx-1] = block << shiftBits;
+        result->blocks[outBlockIdx - 1] = block << shiftBits;
 
         /* zero the remaining low blocks */
         for (i = 0; i < shiftBlocks; ++i) {
@@ -460,7 +453,7 @@ BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs)
     npy_uint32 *resultCur = result->blocks;
     const npy_uint32 *pLhsCur = lhs->blocks;
     const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length;
-    for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
+    for (; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
         npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry;
         *resultCur = (npy_uint32)(product & bitmask_u64(32));
         carry = product >> 32;
@@ -513,9 +506,8 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs)
 
     /* perform standard long multiplication for each small block */
     resultStart = result->blocks;
-    for (smallCur = small->blocks;
-            smallCur != small->blocks + small->length;
-            ++smallCur, ++resultStart) {
+    for (smallCur = small->blocks; smallCur != small->blocks + small->length;
+         ++smallCur, ++resultStart) {
         /*
          * if non-zero, multiply against all the large blocks and add into the
          * result
@@ -526,13 +518,12 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs)
             npy_uint32 *resultCur = resultStart;
             npy_uint64 carry = 0;
             do {
-                npy_uint64 product = (*resultCur) +
-                                     (*largeCur)*(npy_uint64)multiplier + carry;
+                npy_uint64 product = (*resultCur) + (*largeCur) * (npy_uint64)multiplier + carry;
                 carry = product >> 32;
                 *resultCur = product & bitmask_u64(32);
                 ++largeCur;
                 ++resultCur;
-            } while(largeCur != large->blocks + large->length);
+            } while (largeCur != large->blocks + large->length);
 
             DEBUG_ASSERT(resultCur < result->blocks + maxResultLen);
             *resultCur = (npy_uint32)(carry & bitmask_u64(32));
@@ -541,7 +532,7 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs)
 
     /* check if the terminating block has no set bits */
     if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) {
-        result->length = maxResultLen-1;
+        result->length = maxResultLen - 1;
     }
     else {
         result->length = maxResultLen;
@@ -599,7 +590,7 @@ BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp)
     }
 
     /* output the result */
-    if (curTemp != in){
+    if (curTemp != in) {
         BigInt_Copy(in, curTemp);
     }
 }
@@ -676,14 +667,13 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs)
     result->length = large->length;
 
     /* Add each block and add carry the overflow to the next block */
-    largeCur  = large->blocks;
-    largeEnd  = largeCur + large->length;
-    smallCur  = small->blocks;
-    smallEnd  = smallCur + small->length;
+    largeCur = large->blocks;
+    largeEnd = largeCur + large->length;
+    smallCur = small->blocks;
+    smallEnd = smallCur + small->length;
     resultCur = result->blocks;
     while (smallCur != smallEnd) {
-        npy_uint64 sum = carry + (npy_uint64)(*largeCur) +
-                                 (npy_uint64)(*smallCur);
+        npy_uint64 sum = carry + (npy_uint64)(*largeCur) + (npy_uint64)(*smallCur);
         carry = sum >> 32;
         *resultCur = sum & bitmask_u64(32);
         ++largeCur;
@@ -703,8 +693,8 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs)
     /* If there's still a carry, append a new block */
     if (carry != 0) {
         DEBUG_ASSERT(carry == 1);
-        DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) ==
-               large->length && (large->length < c_BigInt_MaxBlocks));
+        DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) == large->length &&
+                     (large->length < c_BigInt_MaxBlocks));
         *resultCur = 1;
         result->length = large->length + 1;
     }
@@ -723,7 +713,7 @@ BigInt_Multiply2(BigInt *result, const BigInt *in)
     npy_uint32 *resultCur = result->blocks;
     const npy_uint32 *pLhsCur = in->blocks;
     const npy_uint32 *pLhsEnd = in->blocks + in->length;
-    for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
+    for (; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
         npy_uint32 cur = *pLhsCur;
         *resultCur = (cur << 1) | carry;
         carry = cur >> 31;
@@ -749,7 +739,7 @@ BigInt_Multiply2_inplace(BigInt *result)
 
     npy_uint32 *cur = result->blocks;
     npy_uint32 *end = result->blocks + result->length;
-    for ( ; cur != end; ++cur) {
+    for (; cur != end; ++cur) {
         npy_uint32 tmpcur = *cur;
         *cur = (tmpcur << 1) | carry;
         carry = tmpcur >> 31;
@@ -802,10 +792,9 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor)
      * Check that the divisor has been correctly shifted into range and that it
      * is not smaller than the dividend in length.
      */
-    DEBUG_ASSERT(!divisor->length == 0 &&
-                divisor->blocks[divisor->length-1] >= 8 &&
-                divisor->blocks[divisor->length-1] < bitmask_u64(32) &&
-                dividend->length <= divisor->length);
+    DEBUG_ASSERT(!divisor->length == 0 && divisor->blocks[divisor->length - 1] >= 8 &&
+                 divisor->blocks[divisor->length - 1] < bitmask_u64(32) &&
+                 dividend->length <= divisor->length);
 
     /*
      * If the dividend is smaller than the divisor, the quotient is zero and the
@@ -840,15 +829,14 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor)
             product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry;
             carry = product >> 32;
 
-            difference = (npy_uint64)*dividendCur
-                       - (product & bitmask_u64(32)) - borrow;
+            difference = (npy_uint64)*dividendCur - (product & bitmask_u64(32)) - borrow;
             borrow = (difference >> 32) & 1;
 
             *dividendCur = difference & bitmask_u64(32);
 
             ++divisorCur;
             ++dividendCur;
-        } while(divisorCur <= finalDivisorBlock);
+        } while (divisorCur <= finalDivisorBlock);
 
         /* remove all leading zero blocks from dividend */
         while (length > 0 && dividend->blocks[length - 1] == 0) {
@@ -872,15 +860,14 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor)
         ++quotient;
 
         do {
-            npy_uint64 difference = (npy_uint64)*dividendCur
-                                  - (npy_uint64)*divisorCur - borrow;
+            npy_uint64 difference = (npy_uint64)*dividendCur - (npy_uint64)*divisorCur - borrow;
             borrow = (difference >> 32) & 1;
 
             *dividendCur = difference & bitmask_u64(32);
 
             ++divisorCur;
             ++dividendCur;
-        } while(divisorCur <= finalDivisorBlock);
+        } while (divisorCur <= finalDivisorBlock);
 
         /* remove all leading zero blocks from dividend */
         while (length > 0 && dividend->blocks[length - 1] == 0) {
@@ -896,25 +883,16 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor)
 static npy_uint32
 LogBase2_32(npy_uint32 val)
 {
-    static const npy_uint8 logTable[256] =
-    {
-        0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
-        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-    };
+    static const npy_uint8 logTable[256] = {
+            0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+            5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
 
     npy_uint32 temp;
 
@@ -960,10 +938,10 @@ LogBase2_128(npy_uint64 hi, npy_uint64 lo)
 }
 
 static npy_uint32
-PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
-            npy_uint32 mantissaHexWidth, char signbit)
+PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, npy_uint32 mantissaHexWidth,
+            char signbit)
 {
-    npy_uint32 maxPrintLen = bufferSize-1;
+    npy_uint32 maxPrintLen = bufferSize - 1;
     npy_uint32 pos = 0;
 
     DEBUG_ASSERT(bufferSize > 0);
@@ -974,12 +952,12 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
 
         /* only print sign for inf values (though nan can have a sign set) */
         if (signbit == '+') {
-            if (pos < maxPrintLen-1) {
+            if (pos < maxPrintLen - 1) {
                 buffer[pos++] = '+';
             }
         }
         else if (signbit == '-') {
-            if (pos < maxPrintLen-1) {
+            if (pos < maxPrintLen - 1) {
                 buffer[pos++] = '-';
             }
         }
@@ -1012,11 +990,10 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
 }
 
 static npy_uint32
-Dragon4(BigInt *bigints, const npy_int32 exponent,
-        const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins,
-        const DigitMode digitMode, const CutoffMode cutoffMode,
-        npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer,
-        npy_uint32 bufferSize, npy_int32 *pOutExponent)
+Dragon4(BigInt *bigints, const npy_int32 exponent, const npy_uint32 mantissaBit,
+        const npy_bool hasUnequalMargins, const DigitMode digitMode, const CutoffMode cutoffMode,
+        npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer, npy_uint32 bufferSize,
+        npy_int32 *pOutExponent)
 {
     char *curDigit = pOutBuffer;
 
@@ -1031,7 +1008,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
      * scaledMarginHigh will point to scaledMarginLow in the case they must be
      * equal to each other, otherwise it will point to optionalMarginHigh.
      */
-    BigInt *mantissa = &bigints[0];  /* the only initialized bigint */
+    BigInt *mantissa = &bigints[0]; /* the only initialized bigint */
     BigInt *scale = &bigints[1];
     BigInt *scaledValue = &bigints[2];
     BigInt *scaledMarginLow = &bigints[3];
@@ -1044,7 +1021,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
     const npy_float64 log10_2 = 0.30102999566398119521373889472449;
     npy_int32 digitExponent, hiBlock;
     npy_int32 cutoff_max_Exponent, cutoff_min_Exponent;
-    npy_uint32 outputDigit;    /* current digit being output */
+    npy_uint32 outputDigit; /* current digit being output */
     npy_uint32 outputLen;
     npy_bool isEven = BigInt_IsEven(mantissa);
     npy_int32 cmp;
@@ -1078,7 +1055,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
             /* scaledValue      = 2 * 2 * mantissa*2^exponent */
             BigInt_ShiftLeft(scaledValue, exponent + 2);
             /* scale            = 2 * 2 * 1 */
-            BigInt_Set_uint32(scale,  4);
+            BigInt_Set_uint32(scale, 4);
             /* scaledMarginLow  = 2 * 2^(exponent-1) */
             BigInt_Pow2(scaledMarginLow, exponent);
             /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */
@@ -1157,8 +1134,8 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
      *  Warning: This calculation assumes npy_float64 is an IEEE-binary64
      *  float. This line may need to be updated if this is not the case.
      */
-    digitExponent = (npy_int32)(
-       ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69));
+    digitExponent =
+            (npy_int32)(ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69));
 
     /*
      * if the digit exponent is smaller than the smallest desired digit for
@@ -1169,11 +1146,10 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
      * accurate value or it will clamp it above the accurate value.
      */
     if (cutoff_max >= 0 && cutoffMode == CutoffMode_FractionLength &&
-            digitExponent <= -cutoff_max) {
+        digitExponent <= -cutoff_max) {
         digitExponent = -cutoff_max + 1;
     }
 
-
     /* Divide value by 10^digitExponent. */
     if (digitExponent > 0) {
         /* A positive exponent creates a division so we multiply the scale. */
@@ -1184,7 +1160,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
          * A negative exponent creates a multiplication so we multiply up the
          * scaledValue, scaledMarginLow and scaledMarginHigh.
          */
-        BigInt *temp=temp1, *pow10=temp2;
+        BigInt *temp = temp1, *pow10 = temp2;
         BigInt_Pow10(pow10, -digitExponent, temp);
 
         BigInt_Multiply(temp, scaledValue, pow10);
@@ -1264,7 +1240,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
     }
 
     /* Output the exponent of the first digit we will print */
-    *pOutExponent = digitExponent-1;
+    *pOutExponent = digitExponent - 1;
 
     /*
      * In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we
@@ -1312,11 +1288,10 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
         for (;;) {
             BigInt *scaledValueHigh = temp1;
 
-            digitExponent = digitExponent-1;
+            digitExponent = digitExponent - 1;
 
             /* divide out the scale to extract the digit */
-            outputDigit =
-                BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale);
+            outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale);
             DEBUG_ASSERT(outputDigit < 10);
 
             /* update the high end of the value */
@@ -1332,7 +1307,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
             cmp = BigInt_Compare(scaledValueHigh, scale);
             high = isEven ? (cmp >= 0) : (cmp > 0);
             if (((low | high) & (digitExponent <= cutoff_min_Exponent)) |
-                    (digitExponent == cutoff_max_Exponent)) {
+                (digitExponent == cutoff_max_Exponent)) {
                 break;
             }
 
@@ -1358,15 +1333,13 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
         high = NPY_FALSE;
 
         for (;;) {
-            digitExponent = digitExponent-1;
+            digitExponent = digitExponent - 1;
 
             /* divide out the scale to extract the digit */
-            outputDigit =
-                BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale);
+            outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale);
             DEBUG_ASSERT(outputDigit < 10);
 
-            if ((scaledValue->length == 0) |
-                    (digitExponent == cutoff_max_Exponent)) {
+            if ((scaledValue->length == 0) | (digitExponent == cutoff_max_Exponent)) {
                 break;
             }
 
@@ -1448,15 +1421,14 @@ Dragon4(BigInt *bigints, const npy_int32 exponent,
 }
 
 static npy_uint32
-FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
-                 npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
-                 npy_bool hasUnequalMargins, DigitMode digit_mode,
-                 CutoffMode cutoff_mode, npy_int32 precision,
-                 npy_int32 min_digits, TrimMode trim_mode,
-                 npy_int32 digits_left, npy_int32 digits_right)
+FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent,
+                 char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins,
+                 DigitMode digit_mode, CutoffMode cutoff_mode, npy_int32 precision,
+                 npy_int32 min_digits, TrimMode trim_mode, npy_int32 digits_left,
+                 npy_int32 digits_right)
 {
     npy_int32 printExponent;
-    npy_int32 numDigits, numWholeDigits=0, has_sign=0;
+    npy_int32 numDigits, numWholeDigits = 0, has_sign = 0;
     npy_int32 add_digits;
 
     npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0;
@@ -1479,9 +1451,8 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
         has_sign = 1;
     }
 
-    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
-                        digit_mode, cutoff_mode, precision, min_digits,
-                        buffer + has_sign, maxPrintLen - has_sign,
+    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, cutoff_mode,
+                        precision, min_digits, buffer + has_sign, maxPrintLen - has_sign,
                         &printExponent);
 
     DEBUG_ASSERT(numDigits > 0);
@@ -1490,7 +1461,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
     /* if output has a whole number */
     if (printExponent >= 0) {
         /* leave the whole number at the start of the buffer */
-        numWholeDigits = printExponent+1;
+        numWholeDigits = printExponent + 1;
         if (numDigits <= numWholeDigits) {
             npy_int32 count = numWholeDigits - numDigits;
             pos += numDigits;
@@ -1502,7 +1473,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
 
             /* add trailing zeros up to the decimal point */
             numDigits += count;
-            for ( ; count > 0; count--) {
+            for (; count > 0; count--) {
                 buffer[pos++] = '0';
             }
         }
@@ -1516,8 +1487,8 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
                 numFractionDigits = maxFractionDigits;
             }
 
-            memmove(buffer + pos + numWholeDigits + 1,
-                    buffer + pos + numWholeDigits, numFractionDigits);
+            memmove(buffer + pos + numWholeDigits + 1, buffer + pos + numWholeDigits,
+                    numFractionDigits);
             pos += numWholeDigits;
             buffer[pos] = '.';
             numDigits = numWholeDigits + 1 + numFractionDigits;
@@ -1548,8 +1519,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
                 numFractionDigits = maxFractionDigits;
             }
 
-            memmove(buffer + pos + digitsStartIdx, buffer + pos,
-                    numFractionDigits);
+            memmove(buffer + pos + digitsStartIdx, buffer + pos, numFractionDigits);
 
             /* insert the leading zeros */
             for (i = 2; i < digitsStartIdx; ++i) {
@@ -1563,7 +1533,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
 
         /* add the decimal point */
         if (pos + 1 < maxPrintLen) {
-            buffer[pos+1] = '.';
+            buffer[pos + 1] = '.';
         }
 
         /* add the initial zero */
@@ -1576,8 +1546,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
     }
 
     /* always add decimal point, except for DprZeros mode */
-    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
-            pos < maxPrintLen) {
+    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) {
         buffer[pos++] = '.';
     }
 
@@ -1594,8 +1563,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
             numFractionDigits++;
         }
     }
-    else if (trim_mode == TrimMode_None &&
-             desiredFractionalDigits > numFractionDigits &&
+    else if (trim_mode == TrimMode_None && desiredFractionalDigits > numFractionDigits &&
              pos < maxPrintLen) {
         /* add trailing zeros up to add_digits length */
         /* compute the number of trailing zeros needed */
@@ -1605,7 +1573,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
         }
         numFractionDigits += count;
 
-        for ( ; count > 0; count--) {
+        for (; count > 0; count--) {
             buffer[pos++] = '0';
         }
     }
@@ -1616,19 +1584,19 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
      * depending on trim settings.
      */
     if (trim_mode != TrimMode_None && numFractionDigits > 0) {
-        while (buffer[pos-1] == '0') {
+        while (buffer[pos - 1] == '0') {
             pos--;
             numFractionDigits--;
         }
-        if (buffer[pos-1] == '.') {
+        if (buffer[pos - 1] == '.') {
             /* in TrimMode_LeaveOneZero, add trailing 0 back */
-            if (trim_mode == TrimMode_LeaveOneZero){
+            if (trim_mode == TrimMode_LeaveOneZero) {
                 buffer[pos++] = '0';
                 numFractionDigits++;
             }
             /* in TrimMode_DptZeros, remove trailing decimal point */
             else if (trim_mode == TrimMode_DptZeros) {
-                    pos--;
+                pos--;
             }
         }
     }
@@ -1638,8 +1606,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
         npy_int32 count = digits_right - numFractionDigits;
 
         /* in trim_mode DptZeros, if right padding, add a space for the . */
-        if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0
-                && pos < maxPrintLen) {
+        if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) {
             buffer[pos++] = ' ';
         }
 
@@ -1647,7 +1614,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
             count = maxPrintLen - pos;
         }
 
-        for ( ; count > 0; count--) {
+        for (; count > 0; count--) {
             buffer[pos++] = ' ';
         }
     }
@@ -1664,7 +1631,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
             memmove(buffer + shift, buffer, count);
         }
         pos = shift + count;
-        for ( ; shift > 0; shift--) {
+        for (; shift > 0; shift--) {
             buffer[shift - 1] = ' ';
         }
     }
@@ -1677,11 +1644,10 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
 }
 
 static npy_uint32
-FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
-                  npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
-                  npy_bool hasUnequalMargins, DigitMode digit_mode,
-                  npy_int32 precision, npy_int32 min_digits, TrimMode trim_mode,
-                  npy_int32 digits_left, npy_int32 exp_digits)
+FormatScientific(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent,
+                 char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins,
+                 DigitMode digit_mode, npy_int32 precision, npy_int32 min_digits,
+                 TrimMode trim_mode, npy_int32 digits_left, npy_int32 exp_digits)
 {
     npy_int32 printExponent;
     npy_int32 numDigits;
@@ -1714,17 +1680,15 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
         pCurOut++;
         --bufferSize;
     }
-    else if (signbit == '-'  && bufferSize > 1) {
+    else if (signbit == '-' && bufferSize > 1) {
         *pCurOut = '-';
         pCurOut++;
         --bufferSize;
     }
 
-    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
-                        digit_mode, CutoffMode_TotalLength,
-                        precision < 0 ? -1 : precision + 1,
-                        min_digits < 0 ? -1 : min_digits + 1,
-                        pCurOut, bufferSize, &printExponent);
+    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode,
+                        CutoffMode_TotalLength, precision < 0 ? -1 : precision + 1,
+                        min_digits < 0 ? -1 : min_digits + 1, pCurOut, bufferSize, &printExponent);
 
     DEBUG_ASSERT(numDigits > 0);
     DEBUG_ASSERT(numDigits <= bufferSize);
@@ -1736,12 +1700,12 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
     }
 
     /* insert the decimal point prior to the fractional number */
-    numFractionDigits = numDigits-1;
+    numFractionDigits = numDigits - 1;
     if (numFractionDigits > 0 && bufferSize > 1) {
         npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2;
 
         if (numFractionDigits > maxFractionDigits) {
-            numFractionDigits =  maxFractionDigits;
+            numFractionDigits = maxFractionDigits;
         }
 
         memmove(pCurOut + 1, pCurOut, numFractionDigits);
@@ -1751,8 +1715,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
     }
 
     /* always add decimal point, except for DprZeros mode */
-    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
-            bufferSize > 1) {
+    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && bufferSize > 1) {
         *pCurOut = '.';
         ++pCurOut;
         --bufferSize;
@@ -1838,12 +1801,12 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
             printExponent /= 10;
         }
         /* count back over leading zeros */
-        for (i = 5; i > exp_digits && digits[i-1] == 0; i--) {
+        for (i = 5; i > exp_digits && digits[i - 1] == 0; i--) {
         }
         exp_size = i;
         /* write remaining digits to tmp buf */
         for (i = exp_size; i > 0; i--) {
-            exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]);
+            exponentBuffer[2 + (exp_size - i)] = (char)('0' + digits[i - 1]);
         }
 
         /* copy the exponent buffer into the output */
@@ -1856,7 +1819,6 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
         bufferSize -= count;
     }
 
-
     DEBUG_ASSERT(bufferSize > 0);
     pCurOut[0] = '\0';
 
@@ -1864,28 +1826,24 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
 }
 
 static npy_uint32
-Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
-                 npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
-                 npy_bool hasUnequalMargins, Dragon4_Options *opt)
+Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent,
+                 char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins,
+                 Dragon4_Options *opt)
 {
     /* format the value */
     if (opt->scientific) {
-        return FormatScientific(buffer, bufferSize, mantissa, exponent,
-                                signbit, mantissaBit, hasUnequalMargins,
-                                opt->digit_mode, opt->precision,
-                                opt->min_digits, opt->trim_mode,
-                                opt->digits_left, opt->exp_digits);
+        return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit,
+                                hasUnequalMargins, opt->digit_mode, opt->precision, opt->min_digits,
+                                opt->trim_mode, opt->digits_left, opt->exp_digits);
     }
     else {
-        return FormatPositional(buffer, bufferSize, mantissa, exponent,
-                                signbit, mantissaBit, hasUnequalMargins,
-                                opt->digit_mode, opt->cutoff_mode,
-                                opt->precision, opt->min_digits, opt->trim_mode,
-                                opt->digits_left, opt->digits_right);
+        return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit,
+                                hasUnequalMargins, opt->digit_mode, opt->cutoff_mode,
+                                opt->precision, opt->min_digits, opt->trim_mode, opt->digits_left,
+                                opt->digits_right);
     }
 }
 
-
 static npy_uint32
 Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt)
 {
@@ -1920,9 +1878,9 @@ Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt)
     if (floatSign != 0) {
         signbit = '-';
     }
-    else if (opt->sign) {
-        signbit = '+';
-    }
+    // else if (opt->sign) {
+    //     signbit = '+';
+    // }
 
     /* if this is a special value */
     if (floatExponent == bitmask_u32(15)) {
@@ -1934,26 +1892,24 @@ Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt)
     /* factor the value into its parts */
     if (floatExponent != 0) {
         /* normal */
-        mantissa_hi         = (1ull << 48) | mantissa_hi;
+        mantissa_hi = (1ull << 48) | mantissa_hi;
         /* mantissa_lo is unchanged */
-        exponent            = floatExponent - 16383 - 112;
-        mantissaBit         = 112;
-        hasUnequalMargins   = (floatExponent != 1) && (mantissa_hi == 0 &&
-                                                       mantissa_lo == 0);
+        exponent = floatExponent - 16383 - 112;
+        mantissaBit = 112;
+        hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && mantissa_lo == 0);
     }
     else {
         /* subnormal */
-        exponent            = 1 - 16383 - 112;
-        mantissaBit         = LogBase2_128(mantissa_hi, mantissa_lo);
-        hasUnequalMargins   = NPY_FALSE;
+        exponent = 1 - 16383 - 112;
+        mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo);
+        hasUnequalMargins = NPY_FALSE;
     }
 
     BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo);
-    return Format_floatbits(buffer, bufferSize, bigints, exponent,
-                            signbit, mantissaBit, hasUnequalMargins, opt);
+    return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit,
+                            hasUnequalMargins, opt);
 }
 
-
 PyObject *
 Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt)
 {
@@ -1966,12 +1922,12 @@ Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt)
 }
 
 PyObject *
-Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode,
-                   CutoffMode cutoff_mode, int precision, int min_digits,
-                   int sign, TrimMode trim, int pad_left, int pad_right)
+Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, CutoffMode cutoff_mode,
+                             int precision, int min_digits, int sign, TrimMode trim, int pad_left,
+                             int pad_right)
 {
     Dragon4_Options opt;
-    
+
     opt.scientific = 0;
     opt.digit_mode = digit_mode;
     opt.cutoff_mode = cutoff_mode;
@@ -1998,9 +1954,8 @@ Dragon4_Scientific_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt)
 }
 
 PyObject *
-Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision,
-                   int min_digits, int sign, TrimMode trim, int pad_left, 
-                   int exp_digits)
+Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision, int min_digits,
+                             int sign, TrimMode trim, int pad_left, int exp_digits)
 {
     Dragon4_Options opt;
 
@@ -2018,40 +1973,47 @@ Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precisio
     return Dragon4_Scientific_QuadDType_opt(val, &opt);
 }
 
-
 PyObject *
-Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode,
-                   int precision, int min_digits, int sign, TrimMode trim,
-                   int pad_left, int pad_right)
+Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision,
+                   int min_digits, int sign, TrimMode trim, int pad_left, int pad_right)
 {
     npy_double v;
 
     if (PyArray_IsScalar(obj, QuadPrecDType)) {
         QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
         if (quad_obj->backend == BACKEND_SLEEF) {
-            return Dragon4_Positional_QuadDType(&quad_obj->value.sleef_value, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right);
-        } else {
+            return Dragon4_Positional_QuadDType(&quad_obj->value.sleef_value, digit_mode,
+                                                cutoff_mode, precision, min_digits, sign, trim,
+                                                pad_left, pad_right);
+        }
+        else {
             Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value);
-            return Dragon4_Positional_QuadDType(&sleef_val, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right);
+            return Dragon4_Positional_QuadDType(&sleef_val, digit_mode, cutoff_mode, precision,
+                                                min_digits, sign, trim, pad_left, pad_right);
         }
     }
+
+    return NULL;
 }
 
 PyObject *
-Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision,
-                   int min_digits, int sign, TrimMode trim, int pad_left,
-                   int exp_digits)
+Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, int min_digits, int sign,
+                   TrimMode trim, int pad_left, int exp_digits)
 {
     npy_double val;
 
     if (PyArray_IsScalar(obj, QuadPrecDType)) {
         QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
         if (quad_obj->backend == BACKEND_SLEEF) {
-            return Dragon4_Scientific_QuadDType(&quad_obj->value.sleef_value, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits);
-        } else {
+            return Dragon4_Scientific_QuadDType(&quad_obj->value.sleef_value, digit_mode, precision,
+                                                min_digits, sign, trim, pad_left, exp_digits);
+        }
+        else {
             Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value);
-            return Dragon4_Scientific_QuadDType(&sleef_val, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits);
+            return Dragon4_Scientific_QuadDType(&sleef_val, digit_mode, precision, min_digits, sign,
+                                                trim, pad_left, exp_digits);
         }
     }
 
+    return NULL;
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index 4524f3db..bdc6a649 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -50,10 +50,8 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend)
 QuadPrecDTypeObject *
 new_quaddtype_instance(QuadBackendType backend)
 {
-    if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE)
-    {
-        PyErr_SetString(PyExc_TypeError,
-                        "Backend must be sleef or longdouble");
+    if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) {
+        PyErr_SetString(PyExc_TypeError, "Backend must be sleef or longdouble");
         return NULL;
     }
 
@@ -71,7 +69,6 @@ new_quaddtype_instance(QuadBackendType backend)
 static QuadPrecDTypeObject *
 ensure_canonical(QuadPrecDTypeObject *self)
 {
-
     Py_INCREF(self);
     return self;
 }
@@ -79,7 +76,6 @@ ensure_canonical(QuadPrecDTypeObject *self)
 static QuadPrecDTypeObject *
 common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 {
-
     if (dtype1->backend != dtype2->backend) {
         PyErr_SetString(PyExc_TypeError,
                         "Cannot find common instance for QuadPrecDTypes with different backends");
@@ -92,7 +88,6 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 static PyArray_DTypeMeta *
 common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
 {
-
     // Promote integer and floating-point types to QuadPrecDType
     if (other->type_num >= 0 &&
         (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) {
@@ -116,7 +111,7 @@ quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), P
         PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array.");
         return NULL;
     }
-    
+
     QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj;
 
     return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend);
@@ -169,7 +164,7 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr)
 static PyArray_Descr *
 quadprec_default_descr(PyArray_DTypeMeta *cls)
 {
-    QuadPrecDTypeObject * temp = new_quaddtype_instance(BACKEND_SLEEF);
+    QuadPrecDTypeObject *temp = new_quaddtype_instance(BACKEND_SLEEF);
     return (PyArray_Descr *)temp;
 }
 
@@ -181,6 +176,7 @@ static PyType_Slot QuadPrecDType_Slots[] = {
         {NPY_DT_setitem, &quadprec_setitem},
         {NPY_DT_getitem, &quadprec_getitem},
         {NPY_DT_default_descr, &quadprec_default_descr},
+        {NPY_DT_PyArray_ArrFuncs_dotfunc, NULL},
         {0, NULL}};
 
 static PyObject *
@@ -220,8 +216,6 @@ QuadPrecDType_str(QuadPrecDTypeObject *self)
     return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str);
 }
 
-
-
 PyArray_DTypeMeta QuadPrecDType = {
         {{
                 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecDType",

From 0ac54649088cfb08dbd461287132f839aac91b1d Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Mon, 16 Sep 2024 15:16:12 +0530
Subject: [PATCH 14/32] added more unary func

---
 quaddtype/numpy_quaddtype/src/dtype.c   |  8 +-
 quaddtype/numpy_quaddtype/src/ops.hpp   | 99 +++++++++++++++++++++++++
 quaddtype/numpy_quaddtype/src/scalar.c  |  2 +-
 quaddtype/numpy_quaddtype/src/umath.cpp | 22 +++++-
 4 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index bdc6a649..839c6ab4 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -50,9 +50,11 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend)
 QuadPrecDTypeObject *
 new_quaddtype_instance(QuadBackendType backend)
 {
+    QuadBackendType target_backend = backend;
     if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) {
         PyErr_SetString(PyExc_TypeError, "Backend must be sleef or longdouble");
         return NULL;
+        // target_backend = BACKEND_SLEEF;
     }
 
     QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new(
@@ -60,9 +62,9 @@ new_quaddtype_instance(QuadBackendType backend)
     if (new == NULL) {
         return NULL;
     }
-    new->base.elsize = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
-    new->base.alignment = (backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double);
-    new->backend = backend;
+    new->base.elsize = (target_backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+    new->base.alignment = (target_backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double);
+    new->backend = target_backend;
     return new;
 }
 
diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp
index 9b3eeed1..83c7ce3c 100644
--- a/quaddtype/numpy_quaddtype/src/ops.hpp
+++ b/quaddtype/numpy_quaddtype/src/ops.hpp
@@ -110,6 +110,49 @@ quad_exp2(Sleef_quad *op, Sleef_quad *out)
     return 0;
 }
 
+static inline int
+quad_sin(Sleef_quad *op, Sleef_quad *out)
+{
+    *out = Sleef_sinq1_u10(*op);
+    return 0;
+}
+
+static inline int
+quad_cos(Sleef_quad *op, Sleef_quad *out)
+{
+    *out = Sleef_cosq1_u10(*op);
+    return 0;
+}
+
+static inline int
+quad_tan(Sleef_quad *op, Sleef_quad *out)
+{
+    *out = Sleef_tanq1_u10(*op);
+    return 0;
+}
+
+static inline int
+quad_asin(Sleef_quad *op, Sleef_quad *out)
+{
+    *out = Sleef_asinq1_u10(*op);
+    return 0;
+}
+
+static inline int
+quad_acos(Sleef_quad *op, Sleef_quad *out)
+{
+    *out = Sleef_acosq1_u10(*op);
+    return 0;
+}
+
+static inline int
+quad_atan(Sleef_quad *op, Sleef_quad *out)
+{
+    *out = Sleef_atanq1_u10(*op);
+    return 0;
+}
+
+
 // Unary long double operations
 typedef int (*unary_op_longdouble_def)(long double *, long double *);
 
@@ -218,6 +261,48 @@ ld_exp2(long double *op, long double *out)
     return 0;
 }
 
+static inline int
+ld_sin(long double *op, long double *out)
+{
+    *out = sinl(*op);
+    return 0;
+}
+
+static inline int
+ld_cos(long double *op, long double *out)
+{
+    *out = cosl(*op);
+    return 0;
+}
+
+static inline int
+ld_tan(long double *op, long double *out)
+{
+    *out = tanl(*op);
+    return 0;
+}
+
+static inline int
+ld_asin(long double *op, long double *out)
+{
+    *out = asinl(*op);
+    return 0;
+}
+
+static inline int
+ld_acos(long double *op, long double *out)
+{
+    *out = acosl(*op);
+    return 0;
+}
+
+static inline int
+ld_atan(long double *op, long double *out)
+{
+    *out = atanl(*op);
+    return 0;
+}
+
 // Binary Quad operations
 typedef int (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *);
 
@@ -277,6 +362,13 @@ quad_maximum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
     return 0;
 }
 
+static inline int
+quad_atan2(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
+{
+    *out = Sleef_atan2q1_u10(*in1, *in2);
+    return 0;
+}
+
 // Binary long double operations
 typedef int (*binary_op_longdouble_def)(long double *, long double *, long double *);
 
@@ -336,6 +428,13 @@ ld_maximum(long double *out, long double *in1, long double *in2)
     return 0;
 }
 
+static inline int
+ld_atan2(long double *out, long double *in1, long double *in2)
+{
+    *out = atan2l(*in1, *in2);
+    return 0;
+}
+
 // comparison quad functions
 typedef npy_bool (*cmp_quad_def)(const Sleef_quad *, const Sleef_quad *);
 
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index f1cbc6d4..1a3ffa3a 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -232,6 +232,6 @@ PyTypeObject QuadPrecision_Type = {
 int
 init_quadprecision_scalar(void)
 {
-    QuadPrecision_Type.tp_base = &PyFloat_Type; // this is not working (subclassing to np.floating)
+    // QuadPrecision_Type.tp_base = &PyFloatingArrType_Type;
     return PyType_Ready(&QuadPrecision_Type);
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index de2d982d..5e9e7619 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -217,6 +217,24 @@ init_quad_unary_ops(PyObject *numpy)
     if (create_quad_unary_ufunc<quad_exp2, ld_exp2>(numpy, "exp2") < 0) {
         return -1;
     }
+    if (create_quad_unary_ufunc<quad_sin, ld_sin>(numpy, "sin") < 0) {
+        return -1;
+    }
+    if (create_quad_unary_ufunc<quad_cos, ld_cos>(numpy, "cos") < 0) {
+        return -1;
+    }
+    if (create_quad_unary_ufunc<quad_tan, ld_tan>(numpy, "tan") < 0) {
+        return -1;
+    }
+    if (create_quad_unary_ufunc<quad_asin, ld_asin>(numpy, "arcsin") < 0) {
+        return -1;
+    }
+    if (create_quad_unary_ufunc<quad_acos, ld_acos>(numpy, "arccos") < 0) {
+        return -1;
+    }
+    if (create_quad_unary_ufunc<quad_atan, ld_atan>(numpy, "arctan") < 0) {
+        return -1;
+    }
     return 0;
 }
 
@@ -315,7 +333,6 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
-
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;
@@ -473,6 +490,9 @@ init_quad_binary_ops(PyObject *numpy)
     if (create_quad_binary_ufunc<quad_maximum, ld_maximum>(numpy, "maximum") < 0) {
         return -1;
     }
+    if (create_quad_binary_ufunc<quad_atan2, ld_atan2>(numpy, "arctan2") < 0) {
+        return -1;
+    }
     return 0;
 }
 

From 25dd6b3a5d2cdeb51bbc32650b240daa13a8ea76 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Mon, 16 Sep 2024 18:02:42 +0530
Subject: [PATCH 15/32] removing testing files

---
 temp.py | 283 --------------------------------------------------------
 1 file changed, 283 deletions(-)
 delete mode 100644 temp.py

diff --git a/temp.py b/temp.py
deleted file mode 100644
index 3ef85e95..00000000
--- a/temp.py
+++ /dev/null
@@ -1,283 +0,0 @@
-import numpy_quaddtype as npq
-import numpy as np
-
-
-def test_scalar_ops(backend):
-    print(f"\nTesting scalar operations for {backend} backend:")
-
-    # Create QuadPrecision instances
-    q1 = npq.QuadPrecision(
-        "3.14159265358979323846264338327950288", backend=backend)
-    q2 = npq.QuadPrecision(
-        "-2.71828182845904523536028747135266250", backend=backend)
-
-    # Test unary operations
-    print("\nUnary operations:")
-    print(f"  Negation of q1: {-q1}")
-    print(f"  Absolute value of q2: {abs(q2)}")
-
-    # Test binary operations
-    print("\nBinary operations:")
-    print(f"  Addition: {q1 + q2}")
-    print(f"  Subtraction: {q1 - q2}")
-    print(f"  Multiplication: {q1 * q2}")
-    print(f"  Division: {q1 / q2}")
-
-    # Test comparison operations
-    print("\nComparison operations:")
-    print(f"  q1 == q2: {q1 == q2}")
-    print(f"  q1 != q2: {q1 != q2}")
-    print(f"  q1 < q2: {q1 < q2}")
-    print(f"  q1 <= q2: {q1 <= q2}")
-    print(f"  q1 > q2: {q1 > q2}")
-    print(f"  q1 >= q2: {q1 >= q2}")
-
-    # Test operations with Python numbers
-    print("\nOperations with Python numbers:")
-    print(f"  q1 + 1: {q1 + 1}")
-    print(f"  q1 - 2.5: {q1 - 2.5}")
-    print(f"  q1 * 3: {q1 * 3}")
-    print(f"  q1 / 2: {q1 / 2}")
-
-    # Test boolean conversion
-    print("\nBoolean conversion:")
-    print(f"  bool(q1): {np.bool(q1)}")
-    print(
-        f"  bool(npq.QuadPrecision('0', backend=backend)): {np.bool(npq.QuadPrecision('0', backend=backend))}")
-
-
-def test_casting(backend):
-    print(f"\nTesting {backend} backend:")
-
-    # Create QuadPrecision instances
-    q1 = npq.QuadPrecision(
-        "3.14159265358979323846264338327950288", backend=backend)
-    q2 = npq.QuadPrecision(
-        "-2.71828182845904523536028747135266250", backend=backend)
-
-    # Test casting from QuadPrecision to numpy dtypes
-    print("Casting from QuadPrecision to numpy dtypes:")
-    print(f"  float32: {np.float32(q1)}")
-    print(f"  float64: {np.float64(q1)}")
-    print(f"  int64: {np.int64(q1)}")
-    print(f"  uint64: {np.uint64(q1)}")
-
-    # Test casting from numpy dtypes to QuadPrecision
-    print("\nCasting from numpy dtypes to QuadPrecision:")
-    print(
-        f"  float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}")
-    print(
-        f"  float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}")
-    print(
-        f"  int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}")
-    print(
-        f"  uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}")
-
-    # Test array operations
-    print("\nArray operations:")
-    q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend))
-    print(f"  QuadPrecision array: {q_array}")
-
-    np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64)
-    q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend))
-    print(f"  Numpy to QuadPrecision: {q_from_np}")
-
-    back_to_np = np.array(q_from_np, dtype=np.float64)
-    print(f"  QuadPrecision to Numpy: {back_to_np}")
-
-    # Test precision maintenance
-    large_int = 12345678901234567890
-    q_large = np.array([large_int], dtype=np.uint64).astype(
-        npq.QuadPrecDType(backend=backend))[0]
-    print(f"\nPrecision test:")
-    print(f"  Original large int: {large_int}")
-    print(f"  QuadPrecision: {q_large}")
-    print(f"  Back to int: {np.uint64(q_large)}")
-
-    # Test edge cases
-
-
-def test_edge_cases(backend):
-    print(f"\nTesting negative numbers for {backend} backend:")
-
-    # Test various negative numbers
-    test_values = [
-        -1.0,
-        -1e10,
-        -1e100,
-        -1e300,
-        np.nextafter(np.finfo(np.float64).min, 0),
-        np.finfo(np.float64).min
-    ]
-
-    for value in test_values:
-        q_value = npq.QuadPrecision(str(value), backend=backend)
-        print(f"  Original: {value}")
-        print(f"  QuadPrecision: {q_value}")
-        print(f"  Back to float64: {np.float64(q_value)}")
-        print()
-
-    # Test value beyond float64 precision
-    beyond_float64_precision = "1.7976931348623157081452742373170435e+308"
-    q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend)
-    print(f"  Beyond float64 precision: {q_beyond}")
-    q_float64_max = npq.QuadPrecision(
-        str(np.finfo(np.float64).max), backend=backend)
-    diff = q_beyond - q_float64_max
-    print(f"  Difference from float64 max: {diff}")
-    print(
-        f"  Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}")
-
-    # Test epsilon (smallest representable difference between two numbers)
-    q_epsilon = npq.QuadPrecision(
-        str(np.finfo(np.float64).eps), backend=backend)
-    print(f"  Float64 epsilon in QuadPrecision: {q_epsilon}")
-    q_one = npq.QuadPrecision("1", backend=backend)
-    q_one_plus_epsilon = q_one + q_epsilon
-    print(f"  1 + epsilon != 1: {q_one_plus_epsilon != q_one}")
-    print(f"  (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}")
-
-
-def test_ufuncs(backend):
-    print(f"\nTesting ufuncs for {backend} backend:")
-
-    # Create QuadPrecision arrays
-    q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
-    q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend))
-
-    # Test unary ufuncs
-    print("\nUnary unfuncs:")
-    print(f"  negative: {np.negative(q_array1)}")
-    print(f"  absolute: {np.absolute(q_array1)}")
-    print(f"  rint: {np.rint(q_array1)}")
-    print(f"  floor: {np.floor(q_array1)}")
-    print(f"  ceil: {np.ceil(q_array1)}")
-    print(f"  trunc: {np.trunc(q_array1)}")
-    print(f"  sqrt: {np.sqrt(q_array1)}")
-    print(f"  square: {np.square(q_array1)}")
-    print(f"  log: {np.log(q_array1)}")
-    print(f"  log2: {np.log2(q_array1)}")
-    print(f"  log10: {np.log10(q_array1)}")
-    print(f"  exp: {np.exp(q_array1)}")
-    print(f"  exp2: {np.exp2(q_array1)}")
-
-    # Test binary ufuncs
-    print("\nBinary ufuncs:")
-    print(f"  add: {np.add(q_array1, q_array2)}")
-    print(f"  subtract: {np.subtract(q_array1, q_array2)}")
-    print(f"  multiply: {np.multiply(q_array1, q_array2)}")
-    print(f"  divide: {np.divide(q_array1, q_array2)}")
-    print(f"  power: {np.power(q_array1, q_array2)}")
-    print(f"  mod: {np.mod(q_array1, q_array2)}")
-    print(f"  minimum: {np.minimum(q_array1, q_array2)}")
-    print(f"  maximum: {np.maximum(q_array1, q_array2)}")
-
-    # Test comparison ufuncs
-    print("\nComparison ufuncs:")
-    print(f"  equal: {np.equal(q_array1, q_array2)}")
-    print(f"  not_equal: {np.not_equal(q_array1, q_array2)}")
-    print(f"  less: {np.less(q_array1, q_array2)}")
-    print(f"  less_equal: {np.less_equal(q_array1, q_array2)}")
-    print(f"  greater: {np.greater(q_array1, q_array2)}")
-    print(f"  greater_equal: {np.greater_equal(q_array1, q_array2)}")
-
-    # Test mixed operations with numpy arrays
-    print(f"Testing backend: {backend}")
-    print("\nMixed operations with numpy arrays:")
-    np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64)
-    print(f"  add: {np.add(q_array1, np_array)}")
-    print(f"  multiply: {np.multiply(q_array1, np_array)}")
-    print(f"  divide: {np.divide(q_array1, np_array)}")
-
-    # Test reduction operations
-    print("\nReduction operations:")
-    print(f"  sum: {np.sum(q_array1)}")
-    print(f"  prod: {np.prod(q_array1)}")
-    print(f"  min: {np.min(q_array1)}")
-    print(f"  max: {np.max(q_array1)}")
-
-from numpy_quaddtype import QuadPrecision, QuadPrecDType
-
-def test_quad_precision():
-    print("Testing QuadPrecision scalar:")
-    
-    # Test different initializations
-    values = [
-        0,
-        1,
-        -1,
-        3.14159265358979323846,
-        1e100,
-        1e-100,
-        float('inf'),
-        float('-inf'),
-        float('nan')
-    ]
-
-    for val in values:
-        q = QuadPrecision(val)
-        print(f"Value: {val}")
-        print(f"  str: {str(q)}")
-        print(f"  repr: {repr(q)}")
-
-    # Test different backends
-    print("\nTesting backends:")
-    q_sleef = QuadPrecision(3.14159265358979323846, backend='sleef')
-    q_longdouble = QuadPrecision(3.14159265358979323846, backend='longdouble')
-    print(f"Sleef:      {q_sleef}")
-    print(f"Long double: {q_longdouble}")
-
-def test_quad_dtype():
-    print("\nTesting QuadPrecDType:")
-
-    # Create an array with QuadPrecDType
-    arr = np.array([0, 1, -1, 3.14159265358979323846, 1e100, 1e-100], 
-                   dtype=QuadPrecDType())
-    
-    print("Array elements:")
-    for elem in arr:
-        print(f"  {elem}")
-
-    print("\nFull array:")
-    print(arr)
-
-    # Test different backends
-    print("\nTesting backends in arrays:")
-    arr_sleef = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='sleef'))
-    arr_longdouble = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='longdouble'))
-    print(f"Sleef array:       {arr_sleef}")
-    print(f"Long double array: {arr_longdouble}")
-
-def test_operations():
-    print("\nTesting basic operations:")
-    a = QuadPrecision(3.14159265358979323846)
-    b = QuadPrecision(2.71828182845904523536)
-    
-    print(f"a = {a}")
-    print(f"b = {b}")
-    print(f"a + b = {a + b}")
-    print(f"a - b = {a - b}")
-    print(f"a * b = {a * b}")
-    print(f"a / b = {a / b}")
-
-def test():
-    # Run tests for both backends
-    for backend in ['sleef', 'longdouble']:
-        test_scalar_ops(backend)
-        test_casting(backend)
-        test_edge_cases(backend)
-        test_ufuncs(backend)
-        test_quad_precision()
-        test_quad_dtype()
-        test_operations()
-        print("*"*50)
-
-    print("All tests completed successfully")
-
-def dot(a, b):
-    r = np.dot(a, b)
-    return r
-
-if __name__ == "__main__":
-    a = np.array([1, 2, 3], dtype=QuadPrecDType())
-    print(dot(a, a))
\ No newline at end of file

From b719ba46e0bcee70459402d11a483f1ce9b4991e Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Mon, 16 Sep 2024 18:35:54 +0530
Subject: [PATCH 16/32] fixing import statements

---
 quaddtype/numpy_quaddtype/src/umath.cpp | 4 ++--
 quaddtype/tests/test_quaddtype.py       | 8 +-------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 5e9e7619..91794dc6 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -333,6 +333,7 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
+    printf("calling promoter for ufunc %s\n", ufunc->name);
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;
@@ -560,7 +561,6 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
     PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
 
     PyType_Slot slots[] = {
-            {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors},
             {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
             {0, NULL}};
 
@@ -579,7 +579,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
     }
 
     PyObject *promoter_capsule =
-            PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL);
+            PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL);
     if (promoter_capsule == NULL) {
         return -1;
     }
diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py
index 20ccd91c..11cf7671 100644
--- a/quaddtype/tests/test_quaddtype.py
+++ b/quaddtype/tests/test_quaddtype.py
@@ -3,7 +3,7 @@
 import numpy as np
 import operator
 
-from quaddtype import QuadPrecDType, QuadPrecision
+from numpy_quaddtype import QuadPrecDType, QuadPrecision
 
 
 def test_create_scalar_simple():
@@ -17,12 +17,6 @@ def test_basic_equality():
         "12.0") == QuadPrecision("12.00")
 
 
-@pytest.mark.parametrize("val", ["123532.543", "12893283.5"])
-def test_scalar_repr(val):
-    expected = f"QuadPrecision('{str(QuadPrecision(val))}')"
-    assert repr(QuadPrecision(val)) == expected
-
-
 @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "pow"])
 @pytest.mark.parametrize("other", ["3.0", "12.5", "100.0"])
 def test_binary_ops(op, other):

From fe6cabc031789dad74019f06c17b1d08febdbe64 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Mon, 16 Sep 2024 18:40:47 +0530
Subject: [PATCH 17/32] removed unnecessary prints

---
 quaddtype/numpy_quaddtype/src/umath.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 91794dc6..8d965fd4 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -333,7 +333,6 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
-    printf("calling promoter for ufunc %s\n", ufunc->name);
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;

From 0fba040ebd5d62c1afad9d8856b637363298d354 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Tue, 17 Sep 2024 18:01:14 +0530
Subject: [PATCH 18/32] added separate aligned and unaligned ufunc support

---
 quaddtype/numpy_quaddtype/src/umath.cpp | 155 +++++++++++++++++++-----
 1 file changed, 126 insertions(+), 29 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 8d965fd4..7b908134 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -104,9 +104,9 @@ quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtype
 
 template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
 int
-quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                                   npy_intp const dimensions[], npy_intp const strides[],
-                                   NpyAuxData *auxdata)
+quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
+                                             npy_intp const dimensions[], npy_intp const strides[],
+                                             NpyAuxData *auxdata)
 {
     npy_intp N = dimensions[0];
     char *in_ptr = data[0];
@@ -135,6 +135,34 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d
     return 0;
 }
 
+template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
+int
+quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                                           npy_intp const dimensions[], npy_intp const strides[],
+                                           NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];
+    char *in_ptr = data[0];
+    char *out_ptr = data[1];
+    npy_intp in_stride = strides[0];
+    npy_intp out_stride = strides[1];
+
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+
+    while (N--) {
+        if (backend == BACKEND_SLEEF) {
+            sleef_op((Sleef_quad *)in_ptr, (Sleef_quad *)out_ptr);
+        }
+        else {
+            longdouble_op((long double *)in_ptr, (long double *)out_ptr);
+        }
+        in_ptr += in_stride;
+        out_ptr += out_stride;
+    }
+    return 0;
+}
+
 template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
 int
 create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
@@ -149,7 +177,9 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
     PyType_Slot slots[] = {
             {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors},
             {NPY_METH_strided_loop,
-             (void *)&quad_generic_unary_op_strided_loop<sleef_op, longdouble_op>},
+             (void *)&quad_generic_unary_op_strided_loop_aligned<sleef_op, longdouble_op>},
+            {NPY_METH_unaligned_strided_loop,
+             (void *)&quad_generic_unary_op_strided_loop_unaligned<sleef_op, longdouble_op>},
             {0, NULL}};
 
     PyArrayMethod_Spec Spec = {
@@ -157,7 +187,7 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
             .nin = 1,
             .nout = 1,
             .casting = NPY_NO_CASTING,
-            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+            .flags = NPY_METH_SUPPORTS_UNALIGNED,
             .dtypes = dtypes,
             .slots = slots,
     };
@@ -245,7 +275,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
                                    PyArray_Descr *const given_descrs[],
                                    PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
 {
-
     QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
     QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
     QuadBackendType target_backend;
@@ -255,7 +284,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
     if (descr_in1->backend != descr_in2->backend) {
         target_backend = BACKEND_LONGDOUBLE;
         casting = NPY_SAFE_CASTING;
-    } else {
+    }
+    else {
         target_backend = descr_in1->backend;
     }
 
@@ -266,7 +296,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
             if (!loop_descrs[i]) {
                 return (NPY_CASTING)-1;
             }
-        } else {
+        }
+        else {
             Py_INCREF(given_descrs[i]);
             loop_descrs[i] = given_descrs[i];
         }
@@ -278,14 +309,16 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
         if (!loop_descrs[2]) {
             return (NPY_CASTING)-1;
         }
-    } else {
+    }
+    else {
         QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2];
         if (descr_out->backend != target_backend) {
             loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
             if (!loop_descrs[2]) {
                 return (NPY_CASTING)-1;
             }
-        } else {
+        }
+        else {
             Py_INCREF(given_descrs[2]);
             loop_descrs[2] = given_descrs[2];
         }
@@ -295,9 +328,9 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
 
 template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
 int
-quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                                npy_intp const dimensions[], npy_intp const strides[],
-                                NpyAuxData *auxdata)
+quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
+                                          npy_intp const dimensions[], npy_intp const strides[],
+                                          NpyAuxData *auxdata)
 {
     npy_intp N = dimensions[0];
     char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -329,6 +362,37 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data
     return 0;
 }
 
+template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
+int
+quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                                        npy_intp const dimensions[], npy_intp const strides[],
+                                        NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];
+    char *in1_ptr = data[0], *in2_ptr = data[1];
+    char *out_ptr = data[2];
+    npy_intp in1_stride = strides[0];
+    npy_intp in2_stride = strides[1];
+    npy_intp out_stride = strides[2];
+
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+
+    while (N--) {
+        if (backend == BACKEND_SLEEF) {
+            sleef_op((Sleef_quad *)out_ptr, (Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr);
+        }
+        else {
+            longdouble_op((long double *)out_ptr, (long double *)in1_ptr, (long double *)in2_ptr);
+        }
+
+        in1_ptr += in1_stride;
+        in2_ptr += in2_stride;
+        out_ptr += out_stride;
+    }
+    return 0;
+}
+
 static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -344,23 +408,19 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
         for (int i = 0; i < 3; i++) {
             Py_INCREF(op_dtypes[1]);
             new_op_dtypes[i] = op_dtypes[1];
-
         }
         return 0;
     }
 
     // Check if any input or signature is QuadPrecision
     for (int i = 0; i < nin; i++) {
-
         if (op_dtypes[i] == &QuadPrecDType) {
             has_quad = true;
-
         }
     }
 
     if (has_quad) {
         common = &QuadPrecDType;
-
     }
     else {
         for (int i = nin; i < nargs; i++) {
@@ -368,7 +428,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                 if (common == NULL) {
                     Py_INCREF(signature[i]);
                     common = signature[i];
-
                 }
                 else if (common != signature[i]) {
                     Py_CLEAR(common);  // Not homogeneous, unset common
@@ -388,7 +447,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
 
             return -1;
         }
-
     }
 
     // Set all new_op_dtypes to the common dtype
@@ -424,7 +482,9 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
     PyType_Slot slots[] = {
             {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors},
             {NPY_METH_strided_loop,
-             (void *)&quad_generic_binop_strided_loop<sleef_op, longdouble_op>},
+             (void *)&quad_generic_binop_strided_loop_aligned<sleef_op, longdouble_op>},
+            {NPY_METH_unaligned_strided_loop,
+             (void *)&quad_generic_binop_strided_loop_unaligned<sleef_op, longdouble_op>},
             {0, NULL}};
 
     PyArrayMethod_Spec Spec = {
@@ -432,7 +492,7 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
             .nin = 2,
             .nout = 1,
             .casting = NPY_NO_CASTING,
-            .flags = NPY_METH_IS_REORDERABLE,
+            .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE),
             .dtypes = dtypes,
             .slots = slots,
     };
@@ -500,9 +560,9 @@ init_quad_binary_ops(PyObject *numpy)
 
 template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
 int
-quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                               npy_intp const dimensions[], npy_intp const strides[],
-                               NpyAuxData *auxdata)
+quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                                       npy_intp const dimensions[], npy_intp const strides[],
+                                       NpyAuxData *auxdata)
 {
     npy_intp N = dimensions[0];
     char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -513,7 +573,6 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
 
     QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
     QuadBackendType backend = descr->backend;
-    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
 
     while (N--) {
         if (backend == BACKEND_SLEEF) {
@@ -532,6 +591,42 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
     return 0;
 }
 
+template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
+int
+quad_generic_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
+                                         npy_intp const dimensions[], npy_intp const strides[],
+                                         NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];
+    char *in1_ptr = data[0], *in2_ptr = data[1];
+    char *out_ptr = data[2];
+    npy_intp in1_stride = strides[0];
+    npy_intp in2_stride = strides[1];
+    npy_intp out_stride = strides[2];
+
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
+    quad_value in1, in2;
+    while (N--) {
+        memcpy(&in1, in1_ptr, elem_size);
+        memcpy(&in2, in2_ptr, elem_size);
+
+        if (backend == BACKEND_SLEEF) {
+            *((npy_bool *)out_ptr) = sleef_comp(&in1.sleef_value, &in2.sleef_value);
+        }
+        else {
+            *((npy_bool *)out_ptr) = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
+        }
+
+        in1_ptr += in1_stride;
+        in2_ptr += in2_stride;
+        out_ptr += out_stride;
+    }
+    return 0;
+}
+
 NPY_NO_EXPORT int
 comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                           PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -559,16 +654,18 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
 
     PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
 
-    PyType_Slot slots[] = {
-            {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
-            {0, NULL}};
+    PyType_Slot slots[] = {{NPY_METH_strided_loop,
+                            (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
+                           {NPY_METH_unaligned_strided_loop,
+                            (void *)&quad_generic_comp_strided_loop_unaligned<sleef_comp, ld_comp>},
+                           {0, NULL}};
 
     PyArrayMethod_Spec Spec = {
             .name = "quad_comp",
             .nin = 2,
             .nout = 1,
             .casting = NPY_NO_CASTING,
-            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+            .flags = NPY_METH_SUPPORTS_UNALIGNED,
             .dtypes = dtypes,
             .slots = slots,
     };

From 4f0a6044619a80ab8e5ff0b140d7b7dc91cdf273 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Tue, 17 Sep 2024 21:51:17 +0530
Subject: [PATCH 19/32] WIP

---
 quaddtype/numpy_quaddtype/src/casts.cpp |  4 +-
 quaddtype/numpy_quaddtype/src/dtype.c   |  3 +-
 quaddtype/numpy_quaddtype/src/ops.hpp   |  1 -
 quaddtype/numpy_quaddtype/src/scalar.c  | 55 +++++++++++++-----------
 quaddtype/numpy_quaddtype/src/umath.cpp | 57 ++++++-------------------
 5 files changed, 46 insertions(+), 74 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index 61809012..a626b366 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -272,15 +272,15 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
                                   PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
                                   npy_intp *view_offset)
 {
-
+    // todo: here it is converting this to SLEEF, losing data and getting 0
     if (given_descrs[1] == NULL) {
+        printf("called\n");
         loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF);
         if (loop_descrs[1] == nullptr) {
             return (NPY_CASTING)-1;
         }
     }
     else {
-
         Py_INCREF(given_descrs[1]);
         loop_descrs[1] = given_descrs[1];
     }
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index 839c6ab4..4c3f73dd 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -63,7 +63,8 @@ new_quaddtype_instance(QuadBackendType backend)
         return NULL;
     }
     new->base.elsize = (target_backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
-    new->base.alignment = (target_backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double);
+    new->base.alignment =
+            (target_backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double);
     new->backend = target_backend;
     return new;
 }
diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp
index 83c7ce3c..dcbb902e 100644
--- a/quaddtype/numpy_quaddtype/src/ops.hpp
+++ b/quaddtype/numpy_quaddtype/src/ops.hpp
@@ -152,7 +152,6 @@ quad_atan(Sleef_quad *op, Sleef_quad *out)
     return 0;
 }
 
-
 // Unary long double operations
 typedef int (*unary_op_longdouble_def)(long double *, long double *);
 
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 1a3ffa3a..5b09abe2 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -76,8 +76,7 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend)
             self->value.longdouble_value = (long double)val;
         }
     }
-    else 
-    {
+    else {
         PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value));
         if (type_str != NULL) {
             const char *type_cstr = PyUnicode_AsUTF8(type_str);
@@ -132,23 +131,25 @@ QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
 static PyObject *
 QuadPrecision_str_dragon4(QuadPrecisionObject *self)
 {
-    Dragon4_Options opt = {
-        .scientific = 0,
-        .digit_mode = DigitMode_Unique,
-        .cutoff_mode = CutoffMode_TotalLength,
-        .precision = SLEEF_QUAD_DIG,
-        .sign = 1,
-        .trim_mode = TrimMode_LeaveOneZero,
-        .digits_left = 1,
-        .digits_right = SLEEF_QUAD_DIG
-    };
+    Dragon4_Options opt = {.scientific = 0,
+                           .digit_mode = DigitMode_Unique,
+                           .cutoff_mode = CutoffMode_TotalLength,
+                           .precision = SLEEF_QUAD_DIG,
+                           .sign = 1,
+                           .trim_mode = TrimMode_LeaveOneZero,
+                           .digits_left = 1,
+                           .digits_right = SLEEF_QUAD_DIG};
 
     if (self->backend == BACKEND_SLEEF) {
-        return Dragon4_Positional_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right);
+        return Dragon4_Positional_QuadDType(
+                &self->value.sleef_value, opt.digit_mode, opt.cutoff_mode, opt.precision,
+                opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right);
     }
     else {
         Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value);
-        return Dragon4_Positional_QuadDType(&sleef_val,  opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right);
+        return Dragon4_Positional_QuadDType(&sleef_val, opt.digit_mode, opt.cutoff_mode,
+                                            opt.precision, opt.min_digits, opt.sign, opt.trim_mode,
+                                            opt.digits_left, opt.digits_right);
     }
 }
 
@@ -181,24 +182,26 @@ QuadPrecision_repr(QuadPrecisionObject *self)
 static PyObject *
 QuadPrecision_repr_dragon4(QuadPrecisionObject *self)
 {
-    Dragon4_Options opt = {
-        .scientific = 1,
-        .digit_mode = DigitMode_Unique,
-        .cutoff_mode = CutoffMode_TotalLength,
-        .precision = SLEEF_QUAD_DIG,
-        .sign = 1,
-        .trim_mode = TrimMode_LeaveOneZero,
-        .digits_left = 1,
-        .exp_digits = 3
-    };
+    Dragon4_Options opt = {.scientific = 1,
+                           .digit_mode = DigitMode_Unique,
+                           .cutoff_mode = CutoffMode_TotalLength,
+                           .precision = SLEEF_QUAD_DIG,
+                           .sign = 1,
+                           .trim_mode = TrimMode_LeaveOneZero,
+                           .digits_left = 1,
+                           .exp_digits = 3};
 
     PyObject *str;
     if (self->backend == BACKEND_SLEEF) {
-        str = Dragon4_Scientific_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits);
+        str = Dragon4_Scientific_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.precision,
+                                           opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left,
+                                           opt.exp_digits);
     }
     else {
         Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value);
-        str = Dragon4_Scientific_QuadDType(&sleef_val, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits);
+        str = Dragon4_Scientific_QuadDType(&sleef_val, opt.digit_mode, opt.precision,
+                                           opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left,
+                                           opt.exp_digits);
     }
 
     if (str == NULL) {
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 7b908134..828ace2b 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -397,11 +397,12 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
+    printf("called comparison promoter\n");
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;
     bool has_quad = false;
-
+    printf("dtyp1: %s dtype2: %s\n", get_dtype_name(op_dtypes[0]), get_dtype_name(op_dtypes[1]));
     // Handle the special case for reductions
     if (op_dtypes[0] == NULL) {
         assert(nin == 2 && ufunc->nout == 1); /* must be reduction */
@@ -415,6 +416,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
     // Check if any input or signature is QuadPrecision
     for (int i = 0; i < nin; i++) {
         if (op_dtypes[i] == &QuadPrecDType) {
+            printf("Quaddtype found at index: %d\n", i);
             has_quad = true;
         }
     }
@@ -431,7 +433,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                 }
                 else if (common != signature[i]) {
                     Py_CLEAR(common);  // Not homogeneous, unset common
-
                     break;
                 }
             }
@@ -459,6 +460,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
         else {
             // Otherwise, use the common dtype
             Py_INCREF(common);
+            printf("setting output to %s dtype\n", get_dtype_name(common));
             new_op_dtypes[i] = common;
         }
     }
@@ -560,42 +562,9 @@ init_quad_binary_ops(PyObject *numpy)
 
 template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
 int
-quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
-                                       npy_intp const dimensions[], npy_intp const strides[],
-                                       NpyAuxData *auxdata)
-{
-    npy_intp N = dimensions[0];
-    char *in1_ptr = data[0], *in2_ptr = data[1];
-    char *out_ptr = data[2];
-    npy_intp in1_stride = strides[0];
-    npy_intp in2_stride = strides[1];
-    npy_intp out_stride = strides[2];
-
-    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
-    QuadBackendType backend = descr->backend;
-
-    while (N--) {
-        if (backend == BACKEND_SLEEF) {
-            *((npy_bool *)out_ptr) =
-                    sleef_comp((const Sleef_quad *)in1_ptr, (const Sleef_quad *)in2_ptr);
-        }
-        else {
-            *((npy_bool *)out_ptr) =
-                    ld_comp((const long double *)in1_ptr, (const long double *)in2_ptr);
-        }
-
-        in1_ptr += in1_stride;
-        in2_ptr += in2_stride;
-        out_ptr += out_stride;
-    }
-    return 0;
-}
-
-template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
-int
-quad_generic_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
-                                         npy_intp const dimensions[], npy_intp const strides[],
-                                         NpyAuxData *auxdata)
+quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                               npy_intp const dimensions[], npy_intp const strides[],
+                               NpyAuxData *auxdata)
 {
     npy_intp N = dimensions[0];
     char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -617,6 +586,7 @@ quad_generic_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *c
             *((npy_bool *)out_ptr) = sleef_comp(&in1.sleef_value, &in2.sleef_value);
         }
         else {
+            printf("%Lf % Lf\n", in1.longdouble_value, in2.longdouble_value);
             *((npy_bool *)out_ptr) = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
         }
 
@@ -632,7 +602,6 @@ comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                           PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
     PyArray_DTypeMeta *new_signature[NPY_MAXARGS];
-
     memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *));
     new_signature[2] = NULL;
     int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes);
@@ -654,11 +623,11 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
 
     PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
 
-    PyType_Slot slots[] = {{NPY_METH_strided_loop,
-                            (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
-                           {NPY_METH_unaligned_strided_loop,
-                            (void *)&quad_generic_comp_strided_loop_unaligned<sleef_comp, ld_comp>},
-                           {0, NULL}};
+    PyType_Slot slots[] = {
+            {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
+            {NPY_METH_unaligned_strided_loop,
+             (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
+            {0, NULL}};
 
     PyArrayMethod_Spec Spec = {
             .name = "quad_comp",

From 707d5d6123e59d3c2a1631998e8cdb447ec06724 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Wed, 18 Sep 2024 13:47:32 +0530
Subject: [PATCH 20/32] fixed longdouble comparison casting issue

---
 quaddtype/numpy_quaddtype/src/casts.cpp |  1 -
 quaddtype/numpy_quaddtype/src/umath.cpp | 62 +++++++++++++++++++++----
 2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index a626b366..aa38e476 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -274,7 +274,6 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
 {
     // todo: here it is converting this to SLEEF, losing data and getting 0
     if (given_descrs[1] == NULL) {
-        printf("called\n");
         loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF);
         if (loop_descrs[1] == nullptr) {
             return (NPY_CASTING)-1;
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 828ace2b..4e7fe949 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -282,6 +282,7 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
     // Determine target backend and if casting is needed
     NPY_CASTING casting = NPY_NO_CASTING;
     if (descr_in1->backend != descr_in2->backend) {
+        
         target_backend = BACKEND_LONGDOUBLE;
         casting = NPY_SAFE_CASTING;
     }
@@ -397,12 +398,12 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
-    printf("called comparison promoter\n");
+    
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;
     bool has_quad = false;
-    printf("dtyp1: %s dtype2: %s\n", get_dtype_name(op_dtypes[0]), get_dtype_name(op_dtypes[1]));
+    
     // Handle the special case for reductions
     if (op_dtypes[0] == NULL) {
         assert(nin == 2 && ufunc->nout == 1); /* must be reduction */
@@ -416,7 +417,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
     // Check if any input or signature is QuadPrecision
     for (int i = 0; i < nin; i++) {
         if (op_dtypes[i] == &QuadPrecDType) {
-            printf("Quaddtype found at index: %d\n", i);
+            
             has_quad = true;
         }
     }
@@ -460,7 +461,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
         else {
             // Otherwise, use the common dtype
             Py_INCREF(common);
-            printf("setting output to %s dtype\n", get_dtype_name(common));
+            
             new_op_dtypes[i] = common;
         }
     }
@@ -560,6 +561,47 @@ init_quad_binary_ops(PyObject *numpy)
 
 // comparison functions
 
+static NPY_CASTING
+quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
+                                   PyArray_Descr *const given_descrs[],
+                                   PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
+{
+    QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
+    QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
+    QuadBackendType target_backend;
+    
+    // As dealing with different backends then cast to boolean
+    NPY_CASTING casting = NPY_NO_CASTING;
+    if (descr_in1->backend != descr_in2->backend) {
+        target_backend = BACKEND_LONGDOUBLE;
+        casting = NPY_SAFE_CASTING;
+    }
+    else {
+        target_backend = descr_in1->backend;
+    }
+
+    // Set up input descriptors, casting if necessary
+    for (int i = 0; i < 2; i++) {
+        if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) {
+            loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
+            if (!loop_descrs[i]) {
+                return (NPY_CASTING)-1;
+            }
+        }
+        else {
+            Py_INCREF(given_descrs[i]);
+            loop_descrs[i] = given_descrs[i];
+        }
+    }
+
+    // Set up output descriptor
+    loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL);
+    if (!loop_descrs[2]) {
+        return (NPY_CASTING)-1;
+    }
+        return casting;
+}
+
 template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
 int
 quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[],
@@ -581,15 +623,18 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
     while (N--) {
         memcpy(&in1, in1_ptr, elem_size);
         memcpy(&in2, in2_ptr, elem_size);
+        npy_bool result;
 
         if (backend == BACKEND_SLEEF) {
-            *((npy_bool *)out_ptr) = sleef_comp(&in1.sleef_value, &in2.sleef_value);
+           result = sleef_comp(&in1.sleef_value, &in2.sleef_value);
         }
         else {
-            printf("%Lf % Lf\n", in1.longdouble_value, in2.longdouble_value);
-            *((npy_bool *)out_ptr) = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
+            
+            result = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
         }
 
+        *((npy_bool *)out_ptr) = result;
+
         in1_ptr += in1_stride;
         in2_ptr += in2_stride;
         out_ptr += out_stride;
@@ -624,6 +669,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
     PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
 
     PyType_Slot slots[] = {
+            {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors},
             {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
             {NPY_METH_unaligned_strided_loop,
              (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
@@ -633,7 +679,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
             .name = "quad_comp",
             .nin = 2,
             .nout = 1,
-            .casting = NPY_NO_CASTING,
+            .casting = NPY_SAFE_CASTING,
             .flags = NPY_METH_SUPPORTS_UNALIGNED,
             .dtypes = dtypes,
             .slots = slots,

From 0a75b9123e2e884fb7b150264088b08f122c7688 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Wed, 18 Sep 2024 15:39:46 +0530
Subject: [PATCH 21/32] aligned and unaligned comparison loops

---
 quaddtype/README.md                     |  9 +++--
 quaddtype/numpy_quaddtype/src/umath.cpp | 48 +++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/quaddtype/README.md b/quaddtype/README.md
index 97425387..60693cb9 100644
--- a/quaddtype/README.md
+++ b/quaddtype/README.md
@@ -9,9 +9,14 @@ pip install -i https://test.pypi.org/simple/ quaddtype
 
 ## Usage
 
-```
+```python
 import numpy as np
-from quaddtype import QuadPrecDType, QuadPrecision
+from numpy_quaddtype import QuadPrecDType, QuadPrecision
 
+# using sleef backend (default)
 np.array([1,2,3], dtype=QuadPrecDType())
+np.array([1,2,3], dtype=QuadPrecDType("sleef"))
+
+# using longdouble backend
+np.array([1,2,3], dtype=QuadPrecDType("longdouble"))
 ```
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 4e7fe949..587c0e93 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -633,7 +633,51 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
             result = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
         }
 
-        *((npy_bool *)out_ptr) = result;
+        memcpy(out_ptr, &result, sizeof(npy_bool));
+
+        in1_ptr += in1_stride;
+        in2_ptr += in2_stride;
+        out_ptr += out_stride;
+    }
+    return 0;
+}
+
+
+template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
+int
+quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                               npy_intp const dimensions[], npy_intp const strides[],
+                               NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];
+    char *in1_ptr = data[0], *in2_ptr = data[1];
+    char *out_ptr = data[2];
+    npy_intp in1_stride = strides[0];
+    npy_intp in2_stride = strides[1];
+    npy_intp out_stride = strides[2];
+
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
+
+    quad_value in1, in2;
+    while (N--) 
+    {
+        quad_value in1 = *(quad_value *)in1_ptr;
+        quad_value in2 = *(quad_value *)in2_ptr;
+
+        npy_bool result;
+
+        if (backend == BACKEND_SLEEF) 
+        {
+            result = sleef_comp(&in1.sleef_value, &in2.sleef_value);
+        } 
+        else 
+        {
+            result = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
+        }
+
+        *(npy_bool *)out_ptr = result;
 
         in1_ptr += in1_stride;
         in2_ptr += in2_stride;
@@ -670,7 +714,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
 
     PyType_Slot slots[] = {
             {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors},
-            {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
+            {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
             {NPY_METH_unaligned_strided_loop,
              (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
             {0, NULL}};

From 0052e160b42dfb2f02728d2d1df91d901ac4f7d7 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 19 Sep 2024 13:17:01 +0530
Subject: [PATCH 22/32] Creating Quad with Quad

---
 quaddtype/numpy_quaddtype/src/scalar.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 5b09abe2..0dcf5f57 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -76,6 +76,19 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend)
             self->value.longdouble_value = (long double)val;
         }
     }
+    else if (Py_TYPE(value) == &QuadPrecision_Type)
+    {
+        // todo: not working for ld backend, getting garbage value not sure why?
+        QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)value;
+        // printf("%d %d\n", quad_obj->backend, backend);
+        // printf("%Lf\n", quad_obj->value.longdouble_value);
+        if (quad_obj->backend == BACKEND_SLEEF) {
+            self->value.sleef_value = quad_obj->value.sleef_value;
+        }
+        else {
+            self->value.longdouble_value = quad_obj->value.longdouble_value;
+        }
+    }
     else {
         PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value));
         if (type_str != NULL) {

From 082b64b1a25baa5a2e36c244459f2016819b3e5d Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Thu, 19 Sep 2024 21:43:48 +0530
Subject: [PATCH 23/32] exposing constants

---
 quaddtype/numpy_quaddtype/__init__.py  |  6 +++++-
 quaddtype/numpy_quaddtype/src/scalar.c | 22 ++++++++++++++++++++++
 quaddtype/numpy_quaddtype/src/scalar.h |  3 +++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py
index 5aa50441..4ea5a2aa 100644
--- a/quaddtype/numpy_quaddtype/__init__.py
+++ b/quaddtype/numpy_quaddtype/__init__.py
@@ -5,7 +5,11 @@
 )
 
 __all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision',
-           'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128']
+           'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', 'pi', 'e']
+
+
+pi = QuadPrecision.pi
+e = QuadPrecision.e
 
 
 def SleefQuadPrecision(value):
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 0dcf5f57..84f545ae 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -233,6 +233,27 @@ QuadPrecision_dealloc(QuadPrecisionObject *self)
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
+PyObject* QuadPrecision_get_pi(PyObject* self, void* closure) {
+    QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF);
+    if (new == NULL) return NULL;
+    new->value.sleef_value = SLEEF_M_PIq;
+    return (PyObject*)new;
+}
+
+PyObject* QuadPrecision_get_e(PyObject* self, void* closure) {
+    QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF);
+    if (new == NULL) return NULL;
+    new->value.sleef_value = SLEEF_M_Eq;
+    return (PyObject*)new;
+}
+
+// Add this to the existing QuadPrecision_Type definition
+static PyGetSetDef QuadPrecision_getset[] = {
+    {"pi", (getter)QuadPrecision_get_pi, NULL, "Pi constant", NULL},
+    {"e", (getter)QuadPrecision_get_e, NULL, "Euler's number", NULL},
+    {NULL}  /* Sentinel */
+};
+
 PyTypeObject QuadPrecision_Type = {
         PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision",
         .tp_basicsize = sizeof(QuadPrecisionObject),
@@ -243,6 +264,7 @@ PyTypeObject QuadPrecision_Type = {
         .tp_str = (reprfunc)QuadPrecision_str_dragon4,
         .tp_as_number = &quad_as_scalar,
         .tp_richcompare = (richcmpfunc)quad_richcompare,
+        .tp_getset = QuadPrecision_getset,
 };
 
 int
diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h
index 4fac1adf..2faf85c9 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.h
+++ b/quaddtype/numpy_quaddtype/src/scalar.h
@@ -31,6 +31,9 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend);
 int
 init_quadprecision_scalar(void);
 
+PyObject* QuadPrecision_get_pi(PyObject* self, void* closure);
+PyObject* QuadPrecision_get_e(PyObject* self, void* closure);
+
 #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type)
 #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value)
 

From e53edbadb1baf4972259028d2e10a059d2fc1d01 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Fri, 20 Sep 2024 17:03:24 +0530
Subject: [PATCH 24/32] resolved reviews

---
 quaddtype/numpy_quaddtype/src/dragon4.c |  5 +++++
 quaddtype/numpy_quaddtype/src/dtype.c   | 11 +++++++---
 quaddtype/numpy_quaddtype/src/scalar.c  | 29 ++++++++++++++-----------
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c
index 1ac9420b..34ad4cbb 100644
--- a/quaddtype/numpy_quaddtype/src/dragon4.c
+++ b/quaddtype/numpy_quaddtype/src/dragon4.c
@@ -1,3 +1,8 @@
+/*
+This code was extracted from NumPy and the original author was Allan Haldane(@ahaldane)
+Modifications are specific to support the SLEEF_QUAD
+*/
+
 #include <numpy/npy_common.h>
 #include <math.h>
 #include <stdio.h>
diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c
index 4c3f73dd..7d0fad37 100644
--- a/quaddtype/numpy_quaddtype/src/dtype.c
+++ b/quaddtype/numpy_quaddtype/src/dtype.c
@@ -79,10 +79,15 @@ ensure_canonical(QuadPrecDTypeObject *self)
 static QuadPrecDTypeObject *
 common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2)
 {
+    // if backend mismatch then return SLEEF one (safe to cast ld to quad)
     if (dtype1->backend != dtype2->backend) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Cannot find common instance for QuadPrecDTypes with different backends");
-        return NULL;
+        if (dtype1->backend == BACKEND_SLEEF) {
+            Py_INCREF(dtype1);
+            return dtype1;
+        }
+
+        Py_INCREF(dtype2);
+        return dtype2;
     }
     Py_INCREF(dtype1);
     return dtype1;
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index 0dcf5f57..c6bac92f 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -76,39 +76,42 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend)
             self->value.longdouble_value = (long double)val;
         }
     }
-    else if (Py_TYPE(value) == &QuadPrecision_Type)
-    {
-        // todo: not working for ld backend, getting garbage value not sure why?
+    else if (Py_TYPE(value) == &QuadPrecision_Type) {
+        Py_DECREF(self);  // discard the default one
         QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)value;
-        // printf("%d %d\n", quad_obj->backend, backend);
-        // printf("%Lf\n", quad_obj->value.longdouble_value);
+
+        // create a new one with the same backend
+        QuadPrecisionObject *self = QuadPrecision_raw_new(quad_obj->backend);
         if (quad_obj->backend == BACKEND_SLEEF) {
             self->value.sleef_value = quad_obj->value.sleef_value;
         }
         else {
             self->value.longdouble_value = quad_obj->value.longdouble_value;
         }
+
+        return self;
     }
     else {
         PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value));
         if (type_str != NULL) {
             const char *type_cstr = PyUnicode_AsUTF8(type_str);
             if (type_cstr != NULL) {
-                PyErr_Format(
-                        PyExc_TypeError,
-                        "QuadPrecision value must be a float, int or string, but got %s instead",
-                        type_cstr);
+                PyErr_Format(PyExc_TypeError,
+                             "QuadPrecision value must be a quad, float, int or string, but got %s "
+                             "instead",
+                             type_cstr);
             }
             else {
-                PyErr_SetString(PyExc_TypeError,
-                                "QuadPrecision value must be a float, int or string, but got an "
-                                "unknown type instead");
+                PyErr_SetString(
+                        PyExc_TypeError,
+                        "QuadPrecision value must be a quad, float, int or string, but got an "
+                        "unknown type instead");
             }
             Py_DECREF(type_str);
         }
         else {
             PyErr_SetString(PyExc_TypeError,
-                            "QuadPrecision value must be a float, int or string, but got an "
+                            "QuadPrecision value must be a quad, float, int or string, but got an "
                             "unknown type instead");
         }
         Py_DECREF(self);

From bbc99a61d105a0177130552f8ac8427985751baa Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Fri, 20 Sep 2024 19:27:55 +0530
Subject: [PATCH 25/32] fixing constant error

---
 quaddtype/numpy_quaddtype/__init__.py         | 19 ++++++----
 .../numpy_quaddtype/src/quaddtype_main.c      | 15 ++++++++
 quaddtype/numpy_quaddtype/src/scalar.c        | 37 ++++++++++++++++++-
 quaddtype/numpy_quaddtype/src/scalar.h        | 18 +++++++++
 quaddtype/numpy_quaddtype/src/umath.cpp       |  3 --
 5 files changed, 80 insertions(+), 12 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py
index 5aa50441..294d7248 100644
--- a/quaddtype/numpy_quaddtype/__init__.py
+++ b/quaddtype/numpy_quaddtype/__init__.py
@@ -1,24 +1,27 @@
 from ._quaddtype_main import (
     QuadPrecision,
     QuadPrecDType,
-    is_longdouble_128
+    is_longdouble_128,
+    pi, e, log2e, log10e, ln2, ln10,
+    sqrt2, sqrt3, egamma, phi, quad_max, quad_min, quad_epsilon, quad_denorm_min
 )
 
-__all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision',
-           'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128']
-
+__all__ = [
+    'QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision',
+    'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128',
+    'pi', 'e', 'log2e', 'log10e', 'ln2', 'ln10',
+    'sqrt2', 'sqrt3', 'egamma', 'phi',
+    'quad_max', 'quad_min', 'quad_epsilon', 'quad_denorm_min'
+]
 
 def SleefQuadPrecision(value):
     return QuadPrecision(value, backend='sleef')
 
-
 def LongDoubleQuadPrecision(value):
     return QuadPrecision(value, backend='longdouble')
 
-
 def SleefQuadPrecDType():
     return QuadPrecDType(backend='sleef')
 
-
 def LongDoubleQuadPrecDType():
-    return QuadPrecDType(backend='longdouble')
+    return QuadPrecDType(backend='longdouble')
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index 7c89d57f..d5f0c729 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -65,6 +65,21 @@ PyInit__quaddtype_main(void)
         goto error;
     }
 
+    if (PyModule_AddObject(m, "pi", (PyObject *)QuadPrecision_pi) < 0) goto error;
+    if (PyModule_AddObject(m, "e", (PyObject *)QuadPrecision_e) < 0) goto error;
+    if (PyModule_AddObject(m, "log2e", (PyObject *)QuadPrecision_log2e) < 0) goto error;
+    if (PyModule_AddObject(m, "log10e", (PyObject *)QuadPrecision_log10e) < 0) goto error;
+    if (PyModule_AddObject(m, "ln2", (PyObject *)QuadPrecision_ln2) < 0) goto error;
+    if (PyModule_AddObject(m, "ln10", (PyObject *)QuadPrecision_ln10) < 0) goto error;
+    if (PyModule_AddObject(m, "sqrt2", (PyObject *)QuadPrecision_sqrt2) < 0) goto error;
+    if (PyModule_AddObject(m, "sqrt3", (PyObject *)QuadPrecision_sqrt3) < 0) goto error;
+    if (PyModule_AddObject(m, "egamma", (PyObject *)QuadPrecision_egamma) < 0) goto error;
+    if (PyModule_AddObject(m, "phi", (PyObject *)QuadPrecision_phi) < 0) goto error;
+    if (PyModule_AddObject(m, "quad_max", (PyObject *)QuadPrecision_quad_max) < 0) goto error;
+    if (PyModule_AddObject(m, "quad_min", (PyObject *)QuadPrecision_quad_min) < 0) goto error;
+    if (PyModule_AddObject(m, "quad_epsilon", (PyObject *)QuadPrecision_quad_epsilon) < 0) goto error;
+    if (PyModule_AddObject(m, "quad_denorm_min", (PyObject *)QuadPrecision_quad_denorm_min) < 0) goto error;
+
     return m;
 
 error:
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index c6bac92f..cafa1d01 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -248,9 +248,44 @@ PyTypeObject QuadPrecision_Type = {
         .tp_richcompare = (richcmpfunc)quad_richcompare,
 };
 
+QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend)
+{
+    QuadPrecisionObject * obj = QuadPrecision_raw_new(backend);
+    if (backend == BACKEND_SLEEF) {
+        obj->value.sleef_value = value;
+    }
+    else {
+        obj->value.longdouble_value = Sleef_cast_to_doubleq1(value);
+    }
+
+    return obj;
+}
+
 int
 init_quadprecision_scalar(void)
 {
-    // QuadPrecision_Type.tp_base = &PyFloatingArrType_Type;
+    QuadPrecisionObject* QuadPrecision_pi = initialize_constants(SLEEF_M_PIq, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_e = initialize_constants(SLEEF_M_Eq, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_log2e = initialize_constants(SLEEF_M_LOG2Eq, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_log10e = initialize_constants(SLEEF_M_LOG10Eq, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_ln2 = initialize_constants(SLEEF_M_LN2q, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_ln10 = initialize_constants(SLEEF_M_LN10q, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_sqrt2 = initialize_constants(SLEEF_M_SQRT2q, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_sqrt3 = initialize_constants(SLEEF_M_SQRT3q, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_egamma = initialize_constants(SLEEF_M_EGAMMAq, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_phi = initialize_constants(SLEEF_M_PHIq, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_quad_max = initialize_constants(SLEEF_QUAD_MAX, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_quad_min = initialize_constants(SLEEF_QUAD_MIN, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_quad_epsilon = initialize_constants(SLEEF_QUAD_EPSILON, BACKEND_SLEEF);
+    QuadPrecisionObject* QuadPrecision_quad_denorm_min = initialize_constants(SLEEF_QUAD_DENORM_MIN, BACKEND_SLEEF);
+
+    if (!QuadPrecision_pi || !QuadPrecision_e || !QuadPrecision_log2e || !QuadPrecision_log10e || 
+        !QuadPrecision_ln2 || !QuadPrecision_ln10|| !QuadPrecision_sqrt2 || !QuadPrecision_sqrt3 || 
+        !QuadPrecision_egamma || !QuadPrecision_phi || !QuadPrecision_quad_max || !QuadPrecision_quad_min ||
+        !QuadPrecision_quad_epsilon || !QuadPrecision_quad_denorm_min) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to initialize QuadPrecision constants");
+        return -1;
+    }
+
     return PyType_Ready(&QuadPrecision_Type);
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h
index 4fac1adf..b2cb3b58 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.h
+++ b/quaddtype/numpy_quaddtype/src/scalar.h
@@ -34,6 +34,24 @@ init_quadprecision_scalar(void);
 #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type)
 #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value)
 
+QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend);
+
+// constant objects
+extern QuadPrecisionObject *QuadPrecision_pi;
+extern QuadPrecisionObject *QuadPrecision_e;
+extern QuadPrecisionObject *QuadPrecision_log2e;
+extern QuadPrecisionObject *QuadPrecision_log10e;
+extern QuadPrecisionObject *QuadPrecision_ln2;
+extern QuadPrecisionObject *QuadPrecision_ln10;
+extern QuadPrecisionObject *QuadPrecision_sqrt2;
+extern QuadPrecisionObject *QuadPrecision_sqrt3;
+extern QuadPrecisionObject *QuadPrecision_egamma;
+extern QuadPrecisionObject *QuadPrecision_phi;
+extern QuadPrecisionObject *QuadPrecision_quad_max;
+extern QuadPrecisionObject *QuadPrecision_quad_min;
+extern QuadPrecisionObject *QuadPrecision_quad_epsilon;
+extern QuadPrecisionObject *QuadPrecision_quad_denorm_min;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 587c0e93..7271443e 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -658,9 +658,6 @@ quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *con
 
     QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
     QuadBackendType backend = descr->backend;
-    size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
-
-    quad_value in1, in2;
     while (N--) 
     {
         quad_value in1 = *(quad_value *)in1_ptr;

From 41ca3f981c4081ddfa246640862f28a5e12bea18 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Fri, 20 Sep 2024 20:58:08 +0530
Subject: [PATCH 26/32] added 128-bit common constants

---
 quaddtype/numpy_quaddtype/__init__.py         | 21 ++++---
 .../numpy_quaddtype/src/quaddtype_main.c      | 58 ++++++++++++++-----
 quaddtype/numpy_quaddtype/src/scalar.c        | 58 -------------------
 quaddtype/numpy_quaddtype/src/scalar.h        | 18 ------
 4 files changed, 57 insertions(+), 98 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py
index 294d7248..e469a4c1 100644
--- a/quaddtype/numpy_quaddtype/__init__.py
+++ b/quaddtype/numpy_quaddtype/__init__.py
@@ -2,16 +2,13 @@
     QuadPrecision,
     QuadPrecDType,
     is_longdouble_128,
-    pi, e, log2e, log10e, ln2, ln10,
-    sqrt2, sqrt3, egamma, phi, quad_max, quad_min, quad_epsilon, quad_denorm_min
+    get_sleef_constant
 )
 
 __all__ = [
     'QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision',
-    'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128',
-    'pi', 'e', 'log2e', 'log10e', 'ln2', 'ln10',
-    'sqrt2', 'sqrt3', 'egamma', 'phi',
-    'quad_max', 'quad_min', 'quad_epsilon', 'quad_denorm_min'
+    'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', 'pi', 'e', 
+    'log2e', 'log10e', 'ln2', 'ln10', 'max_value', 'min_value', 'epsilon'
 ]
 
 def SleefQuadPrecision(value):
@@ -24,4 +21,14 @@ def SleefQuadPrecDType():
     return QuadPrecDType(backend='sleef')
 
 def LongDoubleQuadPrecDType():
-    return QuadPrecDType(backend='longdouble')
\ No newline at end of file
+    return QuadPrecDType(backend='longdouble')
+
+pi = get_sleef_constant("pi")
+e = get_sleef_constant("e")
+log2e = get_sleef_constant("log2e")
+log10e = get_sleef_constant("log10e")
+ln2 = get_sleef_constant("ln2")
+ln10 = get_sleef_constant("ln10")
+max_value = get_sleef_constant("quad_max")
+min_value = get_sleef_constant("quad_min")
+epsilon = get_sleef_constant("epsilon")
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
index d5f0c729..f2935299 100644
--- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c
+++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c
@@ -1,4 +1,7 @@
 #include <Python.h>
+#include <sleef.h>
+#include <sleefquad.h>
+#include <string.h>
 
 #define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API
 #define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API
@@ -26,8 +29,48 @@ static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) {
     }
 }
 
+static PyObject* get_sleef_constant(PyObject* self, PyObject* args) {
+    const char* constant_name;
+    if (!PyArg_ParseTuple(args, "s", &constant_name)) {
+        return NULL;
+    }
+
+    QuadPrecisionObject* result = QuadPrecision_raw_new(BACKEND_SLEEF);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (strcmp(constant_name, "pi") == 0) {
+        result->value.sleef_value = SLEEF_M_PIq;
+    } else if (strcmp(constant_name, "e") == 0) {
+        result->value.sleef_value = SLEEF_M_Eq;
+    } else if (strcmp(constant_name, "log2e") == 0) {
+        result->value.sleef_value = SLEEF_M_LOG2Eq;
+    } else if (strcmp(constant_name, "log10e") == 0) {
+        result->value.sleef_value = SLEEF_M_LOG10Eq;
+    } else if (strcmp(constant_name, "ln2") == 0) {
+        result->value.sleef_value = SLEEF_M_LN2q;
+    } else if (strcmp(constant_name, "ln10") == 0) {
+        result->value.sleef_value = SLEEF_M_LN10q;
+    } else if (strcmp(constant_name, "quad_max") == 0) {
+        result->value.sleef_value = SLEEF_QUAD_MAX;
+    } else if (strcmp(constant_name, "quad_min") == 0) {
+        result->value.sleef_value = SLEEF_QUAD_MIN;
+    } else if (strcmp(constant_name, "epsilon") == 0) {
+        result->value.sleef_value = SLEEF_QUAD_EPSILON;
+    }
+    else {
+        PyErr_SetString(PyExc_ValueError, "Unknown constant name");
+        Py_DECREF(result);
+        return NULL;
+    }
+
+    return (PyObject*)result;
+}
+
 static PyMethodDef module_methods[] = {
     {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"},
+    {"get_sleef_constant", get_sleef_constant, METH_VARARGS, "Get Sleef constant by name"},
     {NULL, NULL, 0, NULL} 
 };
 
@@ -65,21 +108,6 @@ PyInit__quaddtype_main(void)
         goto error;
     }
 
-    if (PyModule_AddObject(m, "pi", (PyObject *)QuadPrecision_pi) < 0) goto error;
-    if (PyModule_AddObject(m, "e", (PyObject *)QuadPrecision_e) < 0) goto error;
-    if (PyModule_AddObject(m, "log2e", (PyObject *)QuadPrecision_log2e) < 0) goto error;
-    if (PyModule_AddObject(m, "log10e", (PyObject *)QuadPrecision_log10e) < 0) goto error;
-    if (PyModule_AddObject(m, "ln2", (PyObject *)QuadPrecision_ln2) < 0) goto error;
-    if (PyModule_AddObject(m, "ln10", (PyObject *)QuadPrecision_ln10) < 0) goto error;
-    if (PyModule_AddObject(m, "sqrt2", (PyObject *)QuadPrecision_sqrt2) < 0) goto error;
-    if (PyModule_AddObject(m, "sqrt3", (PyObject *)QuadPrecision_sqrt3) < 0) goto error;
-    if (PyModule_AddObject(m, "egamma", (PyObject *)QuadPrecision_egamma) < 0) goto error;
-    if (PyModule_AddObject(m, "phi", (PyObject *)QuadPrecision_phi) < 0) goto error;
-    if (PyModule_AddObject(m, "quad_max", (PyObject *)QuadPrecision_quad_max) < 0) goto error;
-    if (PyModule_AddObject(m, "quad_min", (PyObject *)QuadPrecision_quad_min) < 0) goto error;
-    if (PyModule_AddObject(m, "quad_epsilon", (PyObject *)QuadPrecision_quad_epsilon) < 0) goto error;
-    if (PyModule_AddObject(m, "quad_denorm_min", (PyObject *)QuadPrecision_quad_denorm_min) < 0) goto error;
-
     return m;
 
 error:
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index dc5ff623..c65e0889 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -236,27 +236,6 @@ QuadPrecision_dealloc(QuadPrecisionObject *self)
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
-PyObject* QuadPrecision_get_pi(PyObject* self, void* closure) {
-    QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF);
-    if (new == NULL) return NULL;
-    new->value.sleef_value = SLEEF_M_PIq;
-    return (PyObject*)new;
-}
-
-PyObject* QuadPrecision_get_e(PyObject* self, void* closure) {
-    QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF);
-    if (new == NULL) return NULL;
-    new->value.sleef_value = SLEEF_M_Eq;
-    return (PyObject*)new;
-}
-
-// Add this to the existing QuadPrecision_Type definition
-static PyGetSetDef QuadPrecision_getset[] = {
-    {"pi", (getter)QuadPrecision_get_pi, NULL, "Pi constant", NULL},
-    {"e", (getter)QuadPrecision_get_e, NULL, "Euler's number", NULL},
-    {NULL}  /* Sentinel */
-};
-
 PyTypeObject QuadPrecision_Type = {
         PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision",
         .tp_basicsize = sizeof(QuadPrecisionObject),
@@ -267,47 +246,10 @@ PyTypeObject QuadPrecision_Type = {
         .tp_str = (reprfunc)QuadPrecision_str_dragon4,
         .tp_as_number = &quad_as_scalar,
         .tp_richcompare = (richcmpfunc)quad_richcompare,
-        .tp_getset = QuadPrecision_getset,
 };
 
-QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend)
-{
-    QuadPrecisionObject * obj = QuadPrecision_raw_new(backend);
-    if (backend == BACKEND_SLEEF) {
-        obj->value.sleef_value = value;
-    }
-    else {
-        obj->value.longdouble_value = Sleef_cast_to_doubleq1(value);
-    }
-
-    return obj;
-}
-
 int
 init_quadprecision_scalar(void)
 {
-    QuadPrecisionObject* QuadPrecision_pi = initialize_constants(SLEEF_M_PIq, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_e = initialize_constants(SLEEF_M_Eq, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_log2e = initialize_constants(SLEEF_M_LOG2Eq, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_log10e = initialize_constants(SLEEF_M_LOG10Eq, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_ln2 = initialize_constants(SLEEF_M_LN2q, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_ln10 = initialize_constants(SLEEF_M_LN10q, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_sqrt2 = initialize_constants(SLEEF_M_SQRT2q, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_sqrt3 = initialize_constants(SLEEF_M_SQRT3q, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_egamma = initialize_constants(SLEEF_M_EGAMMAq, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_phi = initialize_constants(SLEEF_M_PHIq, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_quad_max = initialize_constants(SLEEF_QUAD_MAX, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_quad_min = initialize_constants(SLEEF_QUAD_MIN, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_quad_epsilon = initialize_constants(SLEEF_QUAD_EPSILON, BACKEND_SLEEF);
-    QuadPrecisionObject* QuadPrecision_quad_denorm_min = initialize_constants(SLEEF_QUAD_DENORM_MIN, BACKEND_SLEEF);
-
-    if (!QuadPrecision_pi || !QuadPrecision_e || !QuadPrecision_log2e || !QuadPrecision_log10e || 
-        !QuadPrecision_ln2 || !QuadPrecision_ln10|| !QuadPrecision_sqrt2 || !QuadPrecision_sqrt3 || 
-        !QuadPrecision_egamma || !QuadPrecision_phi || !QuadPrecision_quad_max || !QuadPrecision_quad_min ||
-        !QuadPrecision_quad_epsilon || !QuadPrecision_quad_denorm_min) {
-        PyErr_SetString(PyExc_RuntimeError, "Failed to initialize QuadPrecision constants");
-        return -1;
-    }
-
     return PyType_Ready(&QuadPrecision_Type);
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h
index 61dea172..2faf85c9 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.h
+++ b/quaddtype/numpy_quaddtype/src/scalar.h
@@ -37,24 +37,6 @@ PyObject* QuadPrecision_get_e(PyObject* self, void* closure);
 #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type)
 #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value)
 
-QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend);
-
-// constant objects
-extern QuadPrecisionObject *QuadPrecision_pi;
-extern QuadPrecisionObject *QuadPrecision_e;
-extern QuadPrecisionObject *QuadPrecision_log2e;
-extern QuadPrecisionObject *QuadPrecision_log10e;
-extern QuadPrecisionObject *QuadPrecision_ln2;
-extern QuadPrecisionObject *QuadPrecision_ln10;
-extern QuadPrecisionObject *QuadPrecision_sqrt2;
-extern QuadPrecisionObject *QuadPrecision_sqrt3;
-extern QuadPrecisionObject *QuadPrecision_egamma;
-extern QuadPrecisionObject *QuadPrecision_phi;
-extern QuadPrecisionObject *QuadPrecision_quad_max;
-extern QuadPrecisionObject *QuadPrecision_quad_min;
-extern QuadPrecisionObject *QuadPrecision_quad_epsilon;
-extern QuadPrecisionObject *QuadPrecision_quad_denorm_min;
-
 #ifdef __cplusplus
 }
 #endif

From 2fd9cad004d6bf48749c016555469c5ff4c3b53c Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Fri, 20 Sep 2024 21:00:45 +0530
Subject: [PATCH 27/32] refactoring

---
 quaddtype/numpy_quaddtype/src/scalar.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h
index 2faf85c9..4fac1adf 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.h
+++ b/quaddtype/numpy_quaddtype/src/scalar.h
@@ -31,9 +31,6 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend);
 int
 init_quadprecision_scalar(void);
 
-PyObject* QuadPrecision_get_pi(PyObject* self, void* closure);
-PyObject* QuadPrecision_get_e(PyObject* self, void* closure);
-
 #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type)
 #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value)
 

From 672be17ab0c9d84f37dac190eada407cdd687525 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sat, 21 Sep 2024 00:01:39 +0530
Subject: [PATCH 28/32] added aligned and unaligned casting loops

---
 quaddtype/numpy_quaddtype/src/casts.cpp | 137 ++++++++++++++++++++----
 quaddtype/numpy_quaddtype/src/scalar.c  |   1 -
 2 files changed, 119 insertions(+), 19 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index aa38e476..ed2f4477 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -49,9 +49,9 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
 }
 
 static int
-quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                          npy_intp const dimensions[], npy_intp const strides[],
-                          void *NPY_UNUSED(auxdata))
+quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
+                                    npy_intp const dimensions[], npy_intp const strides[],
+                                    void *NPY_UNUSED(auxdata))
 {
     npy_intp N = dimensions[0];
     char *in_ptr = data[0];
@@ -79,6 +79,44 @@ quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
     return 0;
 }
 
+static int
+quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                                  npy_intp const dimensions[], npy_intp const strides[],
+                                  void *NPY_UNUSED(auxdata))
+{
+    npy_intp N = dimensions[0];
+    char *in_ptr = data[0];
+    char *out_ptr = data[1];
+    npy_intp in_stride = strides[0];
+    npy_intp out_stride = strides[1];
+
+    QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
+
+    if (descr_in->backend != descr_out->backend) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Cannot convert between different quad-precision backends");
+        return -1;
+    }
+
+    if (descr_in->backend == BACKEND_SLEEF) {
+        while (N--) {
+            *(Sleef_quad *)out_ptr = *(Sleef_quad *)in_ptr;
+            in_ptr += in_stride;
+            out_ptr += out_stride;
+        }
+    }
+    else {
+        while (N--) {
+            *(long double *)out_ptr = *(long double *)in_ptr;
+            in_ptr += in_stride;
+            out_ptr += out_stride;
+        }
+    }
+
+    return 0;
+}
+
 // Casting from other types to QuadDType
 
 template <typename T>
@@ -290,9 +328,9 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
 
 template <typename T>
 static int
-numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                           npy_intp const dimensions[], npy_intp const strides[],
-                           void *NPY_UNUSED(auxdata))
+numpy_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
+                                     npy_intp const dimensions[], npy_intp const strides[],
+                                     void *NPY_UNUSED(auxdata))
 {
     npy_intp N = dimensions[0];
     char *in_ptr = data[0];
@@ -316,6 +354,36 @@ numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
     return 0;
 }
 
+template <typename T>
+static int
+numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                                   npy_intp const dimensions[], npy_intp const strides[],
+                                   void *NPY_UNUSED(auxdata))
+{
+    npy_intp N = dimensions[0];
+    char *in_ptr = data[0];
+    char *out_ptr = data[1];
+
+    QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
+    QuadBackendType backend = descr_out->backend;
+
+    while (N--) {
+        T in_val = *(T *)in_ptr;
+        quad_value out_val = to_quad<T>(in_val, backend);
+
+        if (backend == BACKEND_SLEEF) {
+            *(Sleef_quad *)(out_ptr) = out_val.sleef_value;
+        }
+        else {
+            *(long double *)(out_ptr) = out_val.longdouble_value;
+        }
+
+        in_ptr += strides[0];
+        out_ptr += strides[1];
+    }
+    return 0;
+}
+
 // Casting from QuadDType to other types
 
 template <typename T>
@@ -493,9 +561,9 @@ quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
 
 template <typename T>
 static int
-quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[],
-                           npy_intp const dimensions[], npy_intp const strides[],
-                           void *NPY_UNUSED(auxdata))
+quad_to_numpy_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
+                                     npy_intp const dimensions[], npy_intp const strides[],
+                                     void *NPY_UNUSED(auxdata))
 {
     npy_intp N = dimensions[0];
     char *in_ptr = data[0];
@@ -519,6 +587,37 @@ quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[],
     return 0;
 }
 
+template <typename T>
+static int
+quad_to_numpy_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                                   npy_intp const dimensions[], npy_intp const strides[],
+                                   void *NPY_UNUSED(auxdata))
+{
+    npy_intp N = dimensions[0];
+    char *in_ptr = data[0];
+    char *out_ptr = data[1];
+
+    QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = quad_descr->backend;
+
+    while (N--) {
+        quad_value in_val;
+        if (backend == BACKEND_SLEEF) {
+            in_val.sleef_value = *(Sleef_quad *)in_ptr;
+        }
+        else {
+            in_val.longdouble_value = *(long double *)in_ptr;
+        }
+
+        T out_val = from_quad<T>(in_val, backend);
+        *(T *)(out_ptr) = out_val;
+
+        in_ptr += strides[0];
+        out_ptr += strides[1];
+    }
+    return 0;
+}
+
 static PyArrayMethod_Spec *specs[NUM_CASTS + 1];  // +1 for NULL terminator
 static size_t spec_count = 0;
 
@@ -537,9 +636,10 @@ add_cast_from(PyArray_DTypeMeta *to)
 {
     PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, to};
 
-    PyType_Slot *slots = new PyType_Slot[3]{
+    PyType_Slot *slots = new PyType_Slot[]{
             {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors<T>},
-            {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop<T>},
+            {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop_aligned<T>},
+            {NPY_METH_unaligned_strided_loop, (void *)&quad_to_numpy_strided_loop_unaligned<T>},
             {0, nullptr}};
 
     PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
@@ -547,7 +647,7 @@ add_cast_from(PyArray_DTypeMeta *to)
             .nin = 1,
             .nout = 1,
             .casting = NPY_UNSAFE_CASTING,
-            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+            .flags = NPY_METH_SUPPORTS_UNALIGNED,
             .dtypes = dtypes,
             .slots = slots,
     };
@@ -560,9 +660,10 @@ add_cast_to(PyArray_DTypeMeta *from)
 {
     PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, &QuadPrecDType};
 
-    PyType_Slot *slots = new PyType_Slot[3]{
+    PyType_Slot *slots = new PyType_Slot[]{
             {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors<T>},
-            {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop<T>},
+            {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop_aligned<T>},
+            {NPY_METH_unaligned_strided_loop, (void *)&numpy_to_quad_strided_loop_unaligned<T>},
             {0, nullptr}};
 
     PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
@@ -570,7 +671,7 @@ add_cast_to(PyArray_DTypeMeta *from)
             .nin = 1,
             .nout = 1,
             .casting = NPY_SAFE_CASTING,
-            .flags = (NPY_ARRAYMETHOD_FLAGS)0,
+            .flags = NPY_METH_SUPPORTS_UNALIGNED,
             .dtypes = dtypes,
             .slots = slots,
     };
@@ -583,10 +684,10 @@ init_casts_internal(void)
 {
     PyArray_DTypeMeta **quad2quad_dtypes =
             new PyArray_DTypeMeta *[2]{&QuadPrecDType, &QuadPrecDType};
-    PyType_Slot *quad2quad_slots = new PyType_Slot[4]{
+    PyType_Slot *quad2quad_slots = new PyType_Slot[]{
             {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
-            {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop},
-            {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop},
+            {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned},
+            {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned},
             {0, nullptr}};
 
     PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
index c65e0889..9bec08a6 100644
--- a/quaddtype/numpy_quaddtype/src/scalar.c
+++ b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -79,7 +79,6 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend)
     else if (Py_TYPE(value) == &QuadPrecision_Type) {
         Py_DECREF(self);  // discard the default one
         QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)value;
-
         // create a new one with the same backend
         QuadPrecisionObject *self = QuadPrecision_raw_new(quad_obj->backend);
         if (quad_obj->backend == BACKEND_SLEEF) {

From 600b2c8d96836ec3719fe9242ae9171f5067b182 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sat, 21 Sep 2024 03:06:36 +0530
Subject: [PATCH 29/32] fixed inter-backend cast segment fault

---
 quaddtype/numpy_quaddtype/src/casts.cpp      | 77 +++++++++++++++-----
 quaddtype/numpy_quaddtype/src/scalar_ops.cpp |  8 +-
 quaddtype/numpy_quaddtype/src/umath.cpp      | 38 ++++------
 3 files changed, 77 insertions(+), 46 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp
index ed2f4477..c662b4d5 100644
--- a/quaddtype/numpy_quaddtype/src/casts.cpp
+++ b/quaddtype/numpy_quaddtype/src/casts.cpp
@@ -28,9 +28,10 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
                                  QuadPrecDTypeObject *given_descrs[2],
                                  QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset)
 {
-    if (given_descrs[0]->backend != given_descrs[1]->backend) {
-        return NPY_UNSAFE_CASTING;
-    }
+    NPY_CASTING casting = NPY_NO_CASTING;
+
+    if (given_descrs[0]->backend != given_descrs[1]->backend)
+        casting = NPY_UNSAFE_CASTING;
 
     Py_INCREF(given_descrs[0]);
     loop_descrs[0] = given_descrs[0];
@@ -45,7 +46,7 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
     }
 
     *view_offset = 0;
-    return NPY_NO_CASTING;
+    return casting;
 }
 
 static int
@@ -62,10 +63,26 @@ quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const
     QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0];
     QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
 
+    // inter-backend casting
     if (descr_in->backend != descr_out->backend) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Cannot convert between different quad-precision backends");
-        return -1;
+        while (N--) {
+            quad_value in_val, out_val;
+            if (descr_in->backend == BACKEND_SLEEF) {
+                memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad));
+                out_val.longdouble_value = Sleef_cast_to_doubleq1(in_val.sleef_value);
+            }
+            else {
+                memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double));
+                out_val.sleef_value = Sleef_cast_from_doubleq1(in_val.longdouble_value);
+            }
+            memcpy(out_ptr, &out_val,
+                   (descr_out->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad)
+                                                         : sizeof(long double));
+            in_ptr += in_stride;
+            out_ptr += out_stride;
+        }
+
+        return 0;
     }
 
     size_t elem_size =
@@ -93,10 +110,26 @@ quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const da
     QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0];
     QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
 
+    // inter-backend casting
     if (descr_in->backend != descr_out->backend) {
-        PyErr_SetString(PyExc_TypeError,
-                        "Cannot convert between different quad-precision backends");
-        return -1;
+        if (descr_in->backend == BACKEND_SLEEF) {
+            while (N--) {
+                Sleef_quad in_val = *(Sleef_quad *)in_ptr;
+                *(long double *)out_ptr = Sleef_cast_to_doubleq1(in_val);
+                in_ptr += in_stride;
+                out_ptr += out_stride;
+            }
+        }
+        else {
+            while (N--) {
+                long double in_val = *(long double *)in_ptr;
+                *(Sleef_quad *)out_ptr = Sleef_cast_from_doubleq1(in_val);
+                in_ptr += in_stride;
+                out_ptr += out_stride;
+            }
+        }
+
+        return 0;
     }
 
     if (descr_in->backend == BACKEND_SLEEF) {
@@ -627,6 +660,11 @@ add_spec(PyArrayMethod_Spec *spec)
     if (spec_count < NUM_CASTS) {
         specs[spec_count++] = spec;
     }
+    else {
+        delete[] spec->dtypes;
+        delete[] spec->slots;
+        delete spec;
+    }
 }
 
 // functions to add casts
@@ -682,9 +720,8 @@ add_cast_to(PyArray_DTypeMeta *from)
 PyArrayMethod_Spec **
 init_casts_internal(void)
 {
-    PyArray_DTypeMeta **quad2quad_dtypes =
-            new PyArray_DTypeMeta *[2]{&QuadPrecDType, &QuadPrecDType};
-    PyType_Slot *quad2quad_slots = new PyType_Slot[]{
+    PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr};
+    PyType_Slot *quad2quad_slots = new PyType_Slot[4]{
             {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
             {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned},
             {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned},
@@ -694,7 +731,7 @@ init_casts_internal(void)
             .name = "cast_QuadPrec_to_QuadPrec",
             .nin = 1,
             .nout = 1,
-            .casting = NPY_NO_CASTING,
+            .casting = NPY_UNSAFE_CASTING,  // since SLEEF -> ld might lose precision
             .flags = NPY_METH_SUPPORTS_UNALIGNED,
             .dtypes = quad2quad_dtypes,
             .slots = quad2quad_slots,
@@ -749,13 +786,13 @@ init_casts(void)
 void
 free_casts(void)
 {
-    for (auto cast : specs) {
-        if (cast == nullptr) {
-            continue;
+    for (size_t i = 0; i < spec_count; i++) {
+        if (specs[i]) {
+            delete[] specs[i]->dtypes;
+            delete[] specs[i]->slots;
+            delete specs[i];
+            specs[i] = nullptr;
         }
-        delete[] cast->dtypes;
-        delete[] cast->slots;
-        delete cast;
     }
     spec_count = 0;
 }
\ No newline at end of file
diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
index 2faf810d..98261fe9 100644
--- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
+++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
@@ -124,7 +124,6 @@ quad_binary_func(PyObject *op1, PyObject *op2)
     return (PyObject *)res;
 }
 
-// todo: add support with float and int
 PyObject *
 quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op)
 {
@@ -212,7 +211,8 @@ QuadPrecision_float(QuadPrecisionObject *self)
 {
     if (self->backend == BACKEND_SLEEF) {
         return PyFloat_FromDouble(Sleef_cast_to_doubleq1(self->value.sleef_value));
-    } else {
+    }
+    else {
         return PyFloat_FromDouble((double)self->value.longdouble_value);
     }
 }
@@ -222,12 +222,12 @@ QuadPrecision_int(QuadPrecisionObject *self)
 {
     if (self->backend == BACKEND_SLEEF) {
         return PyLong_FromLongLong(Sleef_cast_to_int64q1(self->value.sleef_value));
-    } else {
+    }
+    else {
         return PyLong_FromLongLong((long long)self->value.longdouble_value);
     }
 }
 
-
 PyNumberMethods quad_as_scalar = {
         .nb_add = (binaryfunc)quad_binary_func<quad_add, ld_add>,
         .nb_subtract = (binaryfunc)quad_binary_func<quad_sub, ld_sub>,
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index 7271443e..f5665451 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -282,7 +282,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
     // Determine target backend and if casting is needed
     NPY_CASTING casting = NPY_NO_CASTING;
     if (descr_in1->backend != descr_in2->backend) {
-        
         target_backend = BACKEND_LONGDOUBLE;
         casting = NPY_SAFE_CASTING;
     }
@@ -398,12 +397,11 @@ static int
 quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
                     PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
 {
-    
     int nin = ufunc->nin;
     int nargs = ufunc->nargs;
     PyArray_DTypeMeta *common = NULL;
     bool has_quad = false;
-    
+
     // Handle the special case for reductions
     if (op_dtypes[0] == NULL) {
         assert(nin == 2 && ufunc->nout == 1); /* must be reduction */
@@ -417,7 +415,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
     // Check if any input or signature is QuadPrecision
     for (int i = 0; i < nin; i++) {
         if (op_dtypes[i] == &QuadPrecDType) {
-            
             has_quad = true;
         }
     }
@@ -461,7 +458,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
         else {
             // Otherwise, use the common dtype
             Py_INCREF(common);
-            
+
             new_op_dtypes[i] = common;
         }
     }
@@ -563,13 +560,14 @@ init_quad_binary_ops(PyObject *numpy)
 
 static NPY_CASTING
 quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
-                                   PyArray_Descr *const given_descrs[],
-                                   PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
+                                       PyArray_Descr *const given_descrs[],
+                                       PyArray_Descr *loop_descrs[],
+                                       npy_intp *NPY_UNUSED(view_offset))
 {
     QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
     QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
     QuadBackendType target_backend;
-    
+
     // As dealing with different backends then cast to boolean
     NPY_CASTING casting = NPY_NO_CASTING;
     if (descr_in1->backend != descr_in2->backend) {
@@ -599,7 +597,7 @@ quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const
     if (!loop_descrs[2]) {
         return (NPY_CASTING)-1;
     }
-        return casting;
+    return casting;
 }
 
 template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
@@ -626,10 +624,9 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
         npy_bool result;
 
         if (backend == BACKEND_SLEEF) {
-           result = sleef_comp(&in1.sleef_value, &in2.sleef_value);
+            result = sleef_comp(&in1.sleef_value, &in2.sleef_value);
         }
         else {
-            
             result = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
         }
 
@@ -642,12 +639,11 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
     return 0;
 }
 
-
 template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
 int
 quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
-                               npy_intp const dimensions[], npy_intp const strides[],
-                               NpyAuxData *auxdata)
+                                       npy_intp const dimensions[], npy_intp const strides[],
+                                       NpyAuxData *auxdata)
 {
     npy_intp N = dimensions[0];
     char *in1_ptr = data[0], *in2_ptr = data[1];
@@ -658,19 +654,16 @@ quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *con
 
     QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
     QuadBackendType backend = descr->backend;
-    while (N--) 
-    {
+    while (N--) {
         quad_value in1 = *(quad_value *)in1_ptr;
         quad_value in2 = *(quad_value *)in2_ptr;
 
         npy_bool result;
 
-        if (backend == BACKEND_SLEEF) 
-        {
+        if (backend == BACKEND_SLEEF) {
             result = sleef_comp(&in1.sleef_value, &in2.sleef_value);
-        } 
-        else 
-        {
+        }
+        else {
             result = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
         }
 
@@ -711,7 +704,8 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
 
     PyType_Slot slots[] = {
             {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors},
-            {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
+            {NPY_METH_strided_loop,
+             (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
             {NPY_METH_unaligned_strided_loop,
              (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
             {0, NULL}};

From 77e25b6aeaafbe8a4354466c1e384c46cfe37777 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sun, 22 Sep 2024 00:53:57 +0530
Subject: [PATCH 30/32] made ufunc return actual value instead of 0

---
 quaddtype/numpy_quaddtype/src/ops.hpp        | 428 ++++++++-----------
 quaddtype/numpy_quaddtype/src/scalar_ops.cpp |  20 +-
 quaddtype/numpy_quaddtype/src/umath.cpp      |  16 +-
 3 files changed, 202 insertions(+), 262 deletions(-)

diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp
index dcbb902e..b32ae386 100644
--- a/quaddtype/numpy_quaddtype/src/ops.hpp
+++ b/quaddtype/numpy_quaddtype/src/ops.hpp
@@ -3,435 +3,375 @@
 #include <cmath>
 
 // Unary Quad Operations
-typedef int (*unary_op_quad_def)(Sleef_quad *, Sleef_quad *);
+typedef Sleef_quad (*unary_op_quad_def)(Sleef_quad *);
 
-static int
-quad_negative(Sleef_quad *op, Sleef_quad *out)
+static Sleef_quad
+quad_negative(Sleef_quad *op)
 {
-    *out = Sleef_negq1(*op);
-    return 0;
+    return Sleef_negq1(*op);
 }
 
-static int
-quad_positive(Sleef_quad *op, Sleef_quad *out)
+static Sleef_quad
+quad_positive(Sleef_quad *op)
 {
-    *out = *op;
-    return 0;
+    return *op;
 }
 
-static inline int
-quad_absolute(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_absolute(Sleef_quad *op)
 {
-    *out = Sleef_fabsq1(*op);
-    return 0;
+    return Sleef_fabsq1(*op);
 }
 
-static inline int
-quad_rint(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_rint(Sleef_quad *op)
 {
-    *out = Sleef_rintq1(*op);
-    return 0;
+    return Sleef_rintq1(*op);
 }
 
-static inline int
-quad_trunc(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_trunc(Sleef_quad *op)
 {
-    *out = Sleef_truncq1(*op);
-    return 0;
+    return Sleef_truncq1(*op);
 }
 
-static inline int
-quad_floor(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_floor(Sleef_quad *op)
 {
-    *out = Sleef_floorq1(*op);
-    return 0;
+    return Sleef_floorq1(*op);
 }
 
-static inline int
-quad_ceil(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_ceil(Sleef_quad *op)
 {
-    *out = Sleef_ceilq1(*op);
-    return 0;
+    return Sleef_ceilq1(*op);
 }
 
-static inline int
-quad_sqrt(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_sqrt(Sleef_quad *op)
 {
-    *out = Sleef_sqrtq1_u05(*op);
-    return 0;
+    return Sleef_sqrtq1_u05(*op);
 }
 
-static inline int
-quad_square(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_square(Sleef_quad *op)
 {
-    *out = Sleef_mulq1_u05(*op, *op);
-    return 0;
+    return Sleef_mulq1_u05(*op, *op);
 }
 
-static inline int
-quad_log(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_log(Sleef_quad *op)
 {
-    *out = Sleef_logq1_u10(*op);
-    return 0;
+    return Sleef_logq1_u10(*op);
 }
 
-static inline int
-quad_log2(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_log2(Sleef_quad *op)
 {
-    *out = Sleef_log2q1_u10(*op);
-    return 0;
+    return Sleef_log2q1_u10(*op);
 }
 
-static inline int
-quad_log10(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_log10(Sleef_quad *op)
 {
-    *out = Sleef_log10q1_u10(*op);
-    return 0;
+    return Sleef_log10q1_u10(*op);
 }
 
-static inline int
-quad_log1p(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_log1p(Sleef_quad *op)
 {
-    *out = Sleef_log1pq1_u10(*op);
-    return 0;
+    return Sleef_log1pq1_u10(*op);
 }
 
-static inline int
-quad_exp(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_exp(Sleef_quad *op)
 {
-    *out = Sleef_expq1_u10(*op);
-    return 0;
+    return Sleef_expq1_u10(*op);
 }
 
-static inline int
-quad_exp2(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_exp2(Sleef_quad *op)
 {
-    *out = Sleef_exp2q1_u10(*op);
-    return 0;
+    return Sleef_exp2q1_u10(*op);
 }
 
-static inline int
-quad_sin(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_sin(Sleef_quad *op)
 {
-    *out = Sleef_sinq1_u10(*op);
-    return 0;
+    return Sleef_sinq1_u10(*op);
 }
 
-static inline int
-quad_cos(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_cos(Sleef_quad *op)
 {
-    *out = Sleef_cosq1_u10(*op);
-    return 0;
+    return Sleef_cosq1_u10(*op);
 }
 
-static inline int
-quad_tan(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_tan(Sleef_quad *op)
 {
-    *out = Sleef_tanq1_u10(*op);
-    return 0;
+    return Sleef_tanq1_u10(*op);
 }
 
-static inline int
-quad_asin(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_asin(Sleef_quad *op)
 {
-    *out = Sleef_asinq1_u10(*op);
-    return 0;
+    return Sleef_asinq1_u10(*op);
 }
 
-static inline int
-quad_acos(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_acos(Sleef_quad *op)
 {
-    *out = Sleef_acosq1_u10(*op);
-    return 0;
+    return Sleef_acosq1_u10(*op);
 }
 
-static inline int
-quad_atan(Sleef_quad *op, Sleef_quad *out)
+static inline Sleef_quad
+quad_atan(Sleef_quad *op)
 {
-    *out = Sleef_atanq1_u10(*op);
-    return 0;
+    return Sleef_atanq1_u10(*op);
 }
 
 // Unary long double operations
-typedef int (*unary_op_longdouble_def)(long double *, long double *);
+typedef long double (*unary_op_longdouble_def)(long double *);
 
-static int
-ld_negative(long double *op, long double *out)
+static inline long double
+ld_negative(long double *op)
 {
-    *out = -(*op);
-    return 0;
+    return -(*op);
 }
 
-static int
-ld_positive(long double *op, long double *out)
+static inline long double
+ld_positive(long double *op)
 {
-    *out = *op;
-    return 0;
+    return *op;
 }
 
-static inline int
-ld_absolute(long double *op, long double *out)
+static inline long double
+ld_absolute(long double *op)
 {
-    *out = fabsl(*op);
-    return 0;
+    return fabsl(*op);
 }
 
-static inline int
-ld_rint(long double *op, long double *out)
+static inline long double
+ld_rint(long double *op)
 {
-    *out = rintl(*op);
-    return 0;
+    return rintl(*op);
 }
 
-static inline int
-ld_trunc(long double *op, long double *out)
+static inline long double
+ld_trunc(long double *op)
 {
-    *out = truncl(*op);
-    return 0;
+    return truncl(*op);
 }
 
-static inline int
-ld_floor(long double *op, long double *out)
+static inline long double
+ld_floor(long double *op)
 {
-    *out = floorl(*op);
-    return 0;
+    return floorl(*op);
 }
 
-static inline int
-ld_ceil(long double *op, long double *out)
+static inline long double
+ld_ceil(long double *op)
 {
-    *out = ceill(*op);
-    return 0;
+    return ceill(*op);
 }
 
-static inline int
-ld_sqrt(long double *op, long double *out)
+static inline long double
+ld_sqrt(long double *op)
 {
-    *out = sqrtl(*op);
-    return 0;
+    return sqrtl(*op);
 }
 
-static inline int
-ld_square(long double *op, long double *out)
+static inline long double
+ld_square(long double *op)
 {
-    *out = (*op) * (*op);
-    return 0;
+    return (*op) * (*op);
 }
 
-static inline int
-ld_log(long double *op, long double *out)
+static inline long double
+ld_log(long double *op)
 {
-    *out = logl(*op);
-    return 0;
+    return logl(*op);
 }
 
-static inline int
-ld_log2(long double *op, long double *out)
+static inline long double
+ld_log2(long double *op)
 {
-    *out = log2l(*op);
-    return 0;
+    return log2l(*op);
 }
 
-static inline int
-ld_log10(long double *op, long double *out)
+static inline long double
+ld_log10(long double *op)
 {
-    *out = log10l(*op);
-    return 0;
+    return log10l(*op);
 }
 
-static inline int
-ld_log1p(long double *op, long double *out)
+static inline long double
+ld_log1p(long double *op)
 {
-    *out = log1pl(*op);
-    return 0;
+    return log1pl(*op);
 }
 
-static inline int
-ld_exp(long double *op, long double *out)
+static inline long double
+ld_exp(long double *op)
 {
-    *out = expl(*op);
-    return 0;
+    return expl(*op);
 }
 
-static inline int
-ld_exp2(long double *op, long double *out)
+static inline long double
+ld_exp2(long double *op)
 {
-    *out = exp2l(*op);
-    return 0;
+    return exp2l(*op);
 }
 
-static inline int
-ld_sin(long double *op, long double *out)
+static inline long double
+ld_sin(long double *op)
 {
-    *out = sinl(*op);
-    return 0;
+    return sinl(*op);
 }
 
-static inline int
-ld_cos(long double *op, long double *out)
+static inline long double
+ld_cos(long double *op)
 {
-    *out = cosl(*op);
-    return 0;
+    return cosl(*op);
 }
 
-static inline int
-ld_tan(long double *op, long double *out)
+static inline long double
+ld_tan(long double *op)
 {
-    *out = tanl(*op);
-    return 0;
+    return tanl(*op);
 }
 
-static inline int
-ld_asin(long double *op, long double *out)
+static inline long double
+ld_asin(long double *op)
 {
-    *out = asinl(*op);
-    return 0;
+    return asinl(*op);
 }
 
-static inline int
-ld_acos(long double *op, long double *out)
+static inline long double
+ld_acos(long double *op)
 {
-    *out = acosl(*op);
-    return 0;
+    return acosl(*op);
 }
 
-static inline int
-ld_atan(long double *op, long double *out)
+static inline long double
+ld_atan(long double *op)
 {
-    *out = atanl(*op);
-    return 0;
+    return atanl(*op);
 }
 
 // Binary Quad operations
-typedef int (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *);
+typedef Sleef_quad (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *);
 
-static inline int
-quad_add(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
+static inline Sleef_quad
+quad_add(Sleef_quad *in1, Sleef_quad *in2)
 {
-    *out = Sleef_addq1_u05(*in1, *in2);
-    return 0;
+    return Sleef_addq1_u05(*in1, *in2);
 }
 
-static inline int
-quad_sub(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
+static inline Sleef_quad
+quad_sub(Sleef_quad *in1, Sleef_quad *in2)
 {
-    *out = Sleef_subq1_u05(*in1, *in2);
-    return 0;
+    return Sleef_subq1_u05(*in1, *in2);
 }
 
-static inline int
-quad_mul(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b)
+static inline Sleef_quad
+quad_mul(Sleef_quad *a, Sleef_quad *b)
 {
-    *res = Sleef_mulq1_u05(*a, *b);
-    return 0;
+    return Sleef_mulq1_u05(*a, *b);
 }
 
-static inline int
-quad_div(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b)
+static inline Sleef_quad
+quad_div(Sleef_quad *a, Sleef_quad *b)
 {
-    *res = Sleef_divq1_u05(*a, *b);
-    return 0;
+    return Sleef_divq1_u05(*a, *b);
 }
 
-static inline int
-quad_pow(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b)
+static inline Sleef_quad
+quad_pow(Sleef_quad *a, Sleef_quad *b)
 {
-    *res = Sleef_powq1_u10(*a, *b);
-    return 0;
+    return Sleef_powq1_u10(*a, *b);
 }
 
-static inline int
-quad_mod(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b)
+static inline Sleef_quad
+quad_mod(Sleef_quad *a, Sleef_quad *b)
 {
-    *res = Sleef_fmodq1(*a, *b);
-    return 0;
+    return Sleef_fmodq1(*a, *b);
 }
 
-static inline int
-quad_minimum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
+static inline Sleef_quad
+quad_minimum(Sleef_quad *in1, Sleef_quad *in2)
 {
-    *out = Sleef_icmpleq1(*in1, *in2) ? *in1 : *in2;
-    return 0;
+    return Sleef_icmpleq1(*in1, *in2) ? *in1 : *in2;
 }
 
-static inline int
-quad_maximum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
+static inline Sleef_quad
+quad_maximum(Sleef_quad *in1, Sleef_quad *in2)
 {
-    *out = Sleef_icmpgeq1(*in1, *in2) ? *in1 : *in2;
-    return 0;
+    return Sleef_icmpgeq1(*in1, *in2) ? *in1 : *in2;
 }
 
-static inline int
-quad_atan2(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2)
+static inline Sleef_quad
+quad_atan2(Sleef_quad *in1, Sleef_quad *in2)
 {
-    *out = Sleef_atan2q1_u10(*in1, *in2);
-    return 0;
+    return Sleef_atan2q1_u10(*in1, *in2);
 }
 
 // Binary long double operations
-typedef int (*binary_op_longdouble_def)(long double *, long double *, long double *);
+typedef long double (*binary_op_longdouble_def)(long double *, long double *);
 
-static inline int
-ld_add(long double *out, long double *in1, long double *in2)
+static inline long double
+ld_add(long double *in1, long double *in2)
 {
-    *out = (*in1) + (*in2);
-    return 0;
+    return (*in1) + (*in2);
 }
 
-static inline int
-ld_sub(long double *out, long double *in1, long double *in2)
+static inline long double
+ld_sub(long double *in1, long double *in2)
 {
-    *out = (*in1) - (*in2);
-    return 0;
+    return (*in1) - (*in2);
 }
 
-static inline int
-ld_mul(long double *res, long double *a, long double *b)
+static inline long double
+ld_mul(long double *a, long double *b)
 {
-    *res = (*a) * (*b);
-    return 0;
+    return (*a) * (*b);
 }
 
-static inline int
-ld_div(long double *res, long double *a, long double *b)
+static inline long double
+ld_div(long double *a, long double *b)
 {
-    *res = (*a) / (*b);
-    return 0;
+    return (*a) / (*b);
 }
 
-static inline int
-ld_pow(long double *res, long double *a, long double *b)
+static inline long double
+ld_pow(long double *a, long double *b)
 {
-    *res = powl(*a, *b);
-    return 0;
+    return powl(*a, *b);
 }
 
-static inline int
-ld_mod(long double *res, long double *a, long double *b)
+static inline long double
+ld_mod(long double *a, long double *b)
 {
-    *res = fmodl(*a, *b);
-    return 0;
+    return fmodl(*a, *b);
 }
 
-static inline int
-ld_minimum(long double *out, long double *in1, long double *in2)
+static inline long double
+ld_minimum(long double *in1, long double *in2)
 {
-    *out = (*in1 < *in2) ? *in1 : *in2;
-    return 0;
+    return (*in1 < *in2) ? *in1 : *in2;
 }
 
-static inline int
-ld_maximum(long double *out, long double *in1, long double *in2)
+static inline long double
+ld_maximum(long double *in1, long double *in2)
 {
-    *out = (*in1 > *in2) ? *in1 : *in2;
-    return 0;
+    return (*in1 > *in2) ? *in1 : *in2;
 }
 
-static inline int
-ld_atan2(long double *out, long double *in1, long double *in2)
+static inline long double
+ld_atan2(long double *in1, long double *in2)
 {
-    *out = atan2l(*in1, *in2);
-    return 0;
+    return atan2l(*in1, *in2);
 }
 
 // comparison quad functions
diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
index 98261fe9..5888ad60 100644
--- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
+++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp
@@ -28,10 +28,10 @@ quad_unary_func(QuadPrecisionObject *self)
     }
 
     if (self->backend == BACKEND_SLEEF) {
-        sleef_op(&self->value.sleef_value, &res->value.sleef_value);
+        res->value.sleef_value = sleef_op(&self->value.sleef_value);
     }
     else {
-        longdouble_op(&self->value.longdouble_value, &res->value.longdouble_value);
+        res->value.longdouble_value = longdouble_op(&self->value.longdouble_value);
     }
     return (PyObject *)res;
 }
@@ -100,22 +100,22 @@ quad_binary_func(PyObject *op1, PyObject *op2)
 
     if (backend == BACKEND_SLEEF) {
         if (is_forward) {
-            sleef_op(&res->value.sleef_value, &self->value.sleef_value,
-                     &other_quad->value.sleef_value);
+            res->value.sleef_value =
+                    sleef_op(&self->value.sleef_value, &other_quad->value.sleef_value);
         }
         else {
-            sleef_op(&res->value.sleef_value, &other_quad->value.sleef_value,
-                     &self->value.sleef_value);
+            res->value.sleef_value =
+                    sleef_op(&other_quad->value.sleef_value, &self->value.sleef_value);
         }
     }
     else {
         if (is_forward) {
-            longdouble_op(&res->value.longdouble_value, &self->value.longdouble_value,
-                          &other_quad->value.longdouble_value);
+            res->value.longdouble_value = longdouble_op(&self->value.longdouble_value,
+                                                        &other_quad->value.longdouble_value);
         }
         else {
-            longdouble_op(&res->value.longdouble_value, &other_quad->value.longdouble_value,
-                          &self->value.longdouble_value);
+            res->value.longdouble_value = longdouble_op(&other_quad->value.longdouble_value,
+                                                        &self->value.longdouble_value);
         }
     }
 
diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp
index f5665451..0058236a 100644
--- a/quaddtype/numpy_quaddtype/src/umath.cpp
+++ b/quaddtype/numpy_quaddtype/src/umath.cpp
@@ -122,10 +122,10 @@ quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, cha
     while (N--) {
         memcpy(&in, in_ptr, elem_size);
         if (backend == BACKEND_SLEEF) {
-            sleef_op(&in.sleef_value, &out.sleef_value);
+            out.sleef_value = sleef_op(&in.sleef_value);
         }
         else {
-            longdouble_op(&in.longdouble_value, &out.longdouble_value);
+            out.longdouble_value = longdouble_op(&in.longdouble_value);
         }
         memcpy(out_ptr, &out, elem_size);
 
@@ -152,10 +152,10 @@ quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char
 
     while (N--) {
         if (backend == BACKEND_SLEEF) {
-            sleef_op((Sleef_quad *)in_ptr, (Sleef_quad *)out_ptr);
+            *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in_ptr);
         }
         else {
-            longdouble_op((long double *)in_ptr, (long double *)out_ptr);
+            *(long double *)out_ptr = longdouble_op((long double *)in_ptr);
         }
         in_ptr += in_stride;
         out_ptr += out_stride;
@@ -348,10 +348,10 @@ quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *
         memcpy(&in1, in1_ptr, elem_size);
         memcpy(&in2, in2_ptr, elem_size);
         if (backend == BACKEND_SLEEF) {
-            sleef_op(&out.sleef_value, &in1.sleef_value, &in2.sleef_value);
+            out.sleef_value = sleef_op(&in1.sleef_value, &in2.sleef_value);
         }
         else {
-            longdouble_op(&out.longdouble_value, &in1.longdouble_value, &in2.longdouble_value);
+            out.longdouble_value = longdouble_op(&in1.longdouble_value, &in2.longdouble_value);
         }
         memcpy(out_ptr, &out, elem_size);
 
@@ -380,10 +380,10 @@ quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *co
 
     while (N--) {
         if (backend == BACKEND_SLEEF) {
-            sleef_op((Sleef_quad *)out_ptr, (Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr);
+            *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr);
         }
         else {
-            longdouble_op((long double *)out_ptr, (long double *)in1_ptr, (long double *)in2_ptr);
+            *(long double *)out_ptr = longdouble_op((long double *)in1_ptr, (long double *)in2_ptr);
         }
 
         in1_ptr += in1_stride;

From 87a789184f961c71645ceaec1e0600562d8e9fe3 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sun, 22 Sep 2024 02:16:24 +0530
Subject: [PATCH 31/32] updated meson.build

---
 quaddtype/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/quaddtype/meson.build b/quaddtype/meson.build
index fde838c0..9e6b2f8e 100644
--- a/quaddtype/meson.build
+++ b/quaddtype/meson.build
@@ -24,6 +24,7 @@ includes = include_directories(
 )
 
 srcs = [
+    'numpy_quaddtype/src/quad_common.h'
     'numpy_quaddtype/src/casts.h',
     'numpy_quaddtype/src/casts.cpp',
     'numpy_quaddtype/src/scalar.h',

From 1a6618a47decb932d360470018511bb7dd9c10ff Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sun, 22 Sep 2024 02:19:59 +0530
Subject: [PATCH 32/32] missed the comma :)

---
 quaddtype/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quaddtype/meson.build b/quaddtype/meson.build
index 9e6b2f8e..d6e651b8 100644
--- a/quaddtype/meson.build
+++ b/quaddtype/meson.build
@@ -24,7 +24,7 @@ includes = include_directories(
 )
 
 srcs = [
-    'numpy_quaddtype/src/quad_common.h'
+    'numpy_quaddtype/src/quad_common.h',
     'numpy_quaddtype/src/casts.h',
     'numpy_quaddtype/src/casts.cpp',
     'numpy_quaddtype/src/scalar.h',