From f082301fd3299eba8efdc975e990f3c548340b95 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 23 Jun 2023 11:57:04 -0700 Subject: [PATCH 1/2] better cython debugging --- .gitignore | 2 +- .../development/debugging_extensions.rst | 27 +++++++++++--- pandas/_libs/meson.build | 6 +++- pandas/_libs/tslibs/meson.build | 5 +++ setup.py | 3 ++ tooling/debug/Dockerfile.pandas-debug | 35 +++++++++++++++++++ tooling/debug/README | 19 ++++++++++ 7 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 tooling/debug/Dockerfile.pandas-debug create mode 100644 tooling/debug/README diff --git a/.gitignore b/.gitignore index cd22c2bb8cb5b..051a3ec11b794 100644 --- a/.gitignore +++ b/.gitignore @@ -39,7 +39,7 @@ .mesonpy-native-file.ini MANIFEST compile_commands.json -debug +.debug # Python files # ################ diff --git a/doc/source/development/debugging_extensions.rst b/doc/source/development/debugging_extensions.rst index 9ac4cf4083475..f490a9fe737af 100644 --- a/doc/source/development/debugging_extensions.rst +++ b/doc/source/development/debugging_extensions.rst @@ -14,14 +14,31 @@ For Python developers with limited or no C/C++ experience this can seem a daunti 2. `Fundamental Python Debugging Part 2 - Python Extensions `_ 3. `Fundamental Python Debugging Part 3 - Cython Extensions `_ -Generating debug builds ------------------------ +Using Docker +------------ -By default building pandas from source will generate a release build. To generate a development build you can type:: +To simplify the debugging process, pandas has created a Docker image with a debug build of Python and the gdb/Cython debuggers pre-installed. You may either ``docker pull pandas/pandas-debug`` to get access to this image or build it from the ``tooling/debug`` folder locallly. - pip install -ve . --no-build-isolation --config-settings=builddir="debug" --config-settings=setup-args="-Dbuildtype=debug" +You can then mount your pandas repository into this image via: -By specifying ``builddir="debug"`` all of the targets will be built and placed in the debug directory relative to the project root. This helps to keep your debug and release artifacts separate; you are of course able to choose a different directory name or omit altogether if you do not care to separate build types. +.. code-block:: sh + + docker run --rm -it -w /data -v ${PWD}:/data pandas/pandas-debug + +Inside the image, you can use meson to build/install pandas and place the build artifacts into a ``debug`` folder using a command as follows: + +.. code-block:: sh + + python -m pip install -ve . --no-build-isolation --config-settings=builddir="debug" --config-settings=setup-args="-Dbuildtype=debug" + +If planning to use cygdb, the files required by that application are placed within the build folder. So you have to first ``cd`` to the build folder, then start that application. + +.. code-block:: sh + + cd debug + cygdb + +Within the debugger you can use `cygdb commands `_ to navigate cython extensions. Editor support -------------- diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 5e59f15d0d089..d685d073c62fc 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -100,12 +100,16 @@ libs_sources = { 'writers': {'sources': ['writers.pyx']} } +cython_args = ['--include-dir', meson.current_build_dir()] +if get_option('buildtype') == 'debug' + cython_args += ['--gdb'] +endif foreach ext_name, ext_dict : libs_sources py.extension_module( ext_name, ext_dict.get('sources'), - cython_args: ['--include-dir', meson.current_build_dir()], + cython_args: cython_args, include_directories: [inc_np, inc_pd], dependencies: ext_dict.get('deps', ''), subdir: 'pandas/_libs', diff --git a/pandas/_libs/tslibs/meson.build b/pandas/_libs/tslibs/meson.build index 4a51f8dc1e461..144d54434d153 100644 --- a/pandas/_libs/tslibs/meson.build +++ b/pandas/_libs/tslibs/meson.build @@ -19,6 +19,11 @@ tslibs_sources = { 'vectorized': {'sources': ['vectorized.pyx']}, } +cython_args = ['--include-dir', meson.current_build_dir()] +if get_option('buildtype') == 'debug' + cython_args += ['--gdb'] +endif + foreach ext_name, ext_dict : tslibs_sources py.extension_module( ext_name, diff --git a/setup.py b/setup.py index e285299cfc05e..7686264283bd9 100755 --- a/setup.py +++ b/setup.py @@ -422,6 +422,9 @@ def maybe_cythonize(extensions, *args, **kwargs): kwargs["nthreads"] = parsed.parallel build_ext.render_templates(_pxifiles) + if debugging_symbols_requested: + kwargs["gdb_debug"] = True + return cythonize(extensions, *args, **kwargs) diff --git a/tooling/debug/Dockerfile.pandas-debug b/tooling/debug/Dockerfile.pandas-debug new file mode 100644 index 0000000000000..00e10a85d7ab9 --- /dev/null +++ b/tooling/debug/Dockerfile.pandas-debug @@ -0,0 +1,35 @@ +FROM ubuntu:latest + +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y build-essential git valgrind + +# cpython dev install +RUN git clone -b 3.10 --depth 1 https://github.com/python/cpython.git /clones/cpython +RUN apt-get install -y libbz2-dev libffi-dev libssl-dev zlib1g-dev liblzma-dev libsqlite3-dev libreadline-dev +RUN cd /clones/cpython && ./configure --with-pydebug && CFLAGS="-g3" make -s -j$(nproc) && make install + +# gdb installation +RUN apt-get install -y wget libgmp-dev +RUN cd /tmp && wget http://mirrors.kernel.org/sourceware/gdb/releases/gdb-12.1.tar.gz && tar -zxf gdb-12.1.tar.gz +RUN cd /tmp/gdb-12.1 && ./configure --with-python=python3 && make -j$(nproc) && make install +RUN rm -r /tmp/gdb-12.1 + +# pandas dependencies +RUN python3 -m pip install \ + cython \ + hypothesis \ + ninja \ + numpy \ + meson \ + meson-python \ + pytest \ + pytest-asyncio \ + python-dateutil \ + pytz \ + versioneer[toml] + +# At the time this docker image was built, there was a bug/limitation +# with meson where only having a python3 executable and not python +# would cause the build to fail. This symlink could be removed if +# users stick to always calling python3 within the container +RUN ln -s /usr/local/bin/python3 /usr/local/bin/python diff --git a/tooling/debug/README b/tooling/debug/README new file mode 100644 index 0000000000000..111a958ff5ef5 --- /dev/null +++ b/tooling/debug/README @@ -0,0 +1,19 @@ +The Docker image here helps to set up an isolated environment containing a debug version of Python and a gdb installation which the Cython debugger can work with. + +If you have internet access, you can pull a pre-built image via + +```sh +docker pull pandas/pandas-debug +``` + +To build the image locally, you can do + +```sh +docker build . -t pandas/pandas-debug -f Dockerfile.pandas-debug +``` + +For pandas developers, you can push a new copy of the image to dockerhub via + +```sh +docker push pandas/pandas-debug +``` From f51720fabcd708369d18e3879ab1ffbea50256f5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 23 Jun 2023 13:49:20 -0700 Subject: [PATCH 2/2] Missing variable use --- pandas/_libs/tslibs/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/meson.build b/pandas/_libs/tslibs/meson.build index 144d54434d153..e8a067ce13a86 100644 --- a/pandas/_libs/tslibs/meson.build +++ b/pandas/_libs/tslibs/meson.build @@ -28,7 +28,7 @@ foreach ext_name, ext_dict : tslibs_sources py.extension_module( ext_name, ext_dict.get('sources'), - cython_args: ['--include-dir', meson.current_build_dir()], + cython_args: cython_args, include_directories: [inc_np, inc_pd], dependencies: ext_dict.get('deps', ''), subdir: 'pandas/_libs/tslibs',