Skip to content

Commit 8d782fb

Browse files
Merge branch 'main' into raise-on-parse-int-overflow
2 parents 8b406ab + c2fade1 commit 8d782fb

File tree

239 files changed

+3721
-1485
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

239 files changed

+3721
-1485
lines changed

.github/workflows/docbuild-and-upload.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
8282

8383
- name: Upload prod docs
84-
run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/version/${GITHUB_REF_NAME}
84+
run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/version/${GITHUB_REF_NAME:1}
8585
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
8686

8787
- name: Move docs into site directory

.github/workflows/scorecards.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: Scorecards supply-chain security
2+
on:
3+
# Only the default branch is supported.
4+
branch_protection_rule:
5+
schedule:
6+
- cron: '27 19 * * 4'
7+
push:
8+
branches: [ "main" ]
9+
10+
# Declare default permissions as read only.
11+
permissions: read-all
12+
13+
jobs:
14+
analysis:
15+
name: Scorecards analysis
16+
runs-on: ubuntu-latest
17+
permissions:
18+
# Needed to upload the results to code-scanning dashboard.
19+
security-events: write
20+
# Used to receive a badge.
21+
id-token: write
22+
23+
if: github.repository == 'pandas-dev/pandas' # don't run on forks
24+
25+
steps:
26+
- name: "Checkout code"
27+
uses: actions/checkout@v3
28+
with:
29+
persist-credentials: false
30+
31+
- name: "Run analysis"
32+
uses: ossf/scorecard-action@v2.0.3
33+
with:
34+
results_file: results.sarif
35+
results_format: sarif
36+
37+
# Publish the results for public repositories to enable scorecard badges. For more details, see
38+
# https://github.com/ossf/scorecard-action#publishing-results.
39+
publish_results: true
40+
41+
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
42+
# format to the repository Actions tab.
43+
- name: "Upload artifact"
44+
uses: actions/upload-artifact@v3
45+
with:
46+
name: SARIF file
47+
path: results.sarif
48+
retention-days: 5
49+
50+
# Upload the results to GitHub's code scanning dashboard.
51+
- name: "Upload to code-scanning"
52+
uses: github/codeql-action/upload-sarif@v1
53+
with:
54+
sarif_file: results.sarif

.github/workflows/wheels.yml

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# Workflow to build wheels for upload to PyPI.
2+
# Inspired by numpy's cibuildwheel config https://github.com/numpy/numpy/blob/main/.github/workflows/wheels.yml
3+
#
4+
# In an attempt to save CI resources, wheel builds do
5+
# not run on each push but only weekly and for releases.
6+
# Wheel builds can be triggered from the Actions page
7+
# (if you have the perms) on a commit to master.
8+
#
9+
# Alternatively, you can add labels to the pull request in order to trigger wheel
10+
# builds.
11+
# The label(s) that trigger builds are:
12+
# - Build
13+
name: Wheel builder
14+
15+
on:
16+
schedule:
17+
# ┌───────────── minute (0 - 59)
18+
# │ ┌───────────── hour (0 - 23)
19+
# │ │ ┌───────────── day of the month (1 - 31)
20+
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
21+
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
22+
# │ │ │ │ │
23+
- cron: "27 3 */1 * *"
24+
push:
25+
pull_request:
26+
types: [labeled, opened, synchronize, reopened]
27+
workflow_dispatch:
28+
29+
concurrency:
30+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
31+
cancel-in-progress: true
32+
33+
jobs:
34+
build_wheels:
35+
name: Build wheel for ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
36+
if: >-
37+
github.event_name == 'schedule' ||
38+
github.event_name == 'workflow_dispatch' ||
39+
(github.event_name == 'pull_request' &&
40+
contains(github.event.pull_request.labels.*.name, 'Build')) ||
41+
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && ( ! endsWith(github.ref, 'dev0')))
42+
runs-on: ${{ matrix.buildplat[0] }}
43+
strategy:
44+
# Ensure that a wheel builder finishes even if another fails
45+
fail-fast: false
46+
matrix:
47+
# Github Actions doesn't support pairing matrix values together, let's improvise
48+
# https://github.com/github/feedback/discussions/7835#discussioncomment-1769026
49+
buildplat:
50+
- [ubuntu-20.04, manylinux_x86_64]
51+
- [macos-11, macosx_*]
52+
- [windows-2019, win_amd64]
53+
- [windows-2019, win32]
54+
# TODO: support PyPy?
55+
python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11-dev"]]# "pp38", "pp39"]
56+
env:
57+
IS_32_BIT: ${{ matrix.buildplat[1] == 'win32' }}
58+
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
59+
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
60+
steps:
61+
- name: Checkout pandas
62+
uses: actions/checkout@v3
63+
with:
64+
submodules: true
65+
# versioneer.py requires the latest tag to be reachable. Here we
66+
# fetch the complete history to get access to the tags.
67+
# A shallow clone can work when the following issue is resolved:
68+
# https://github.com/actions/checkout/issues/338
69+
fetch-depth: 0
70+
71+
- name: Build wheels
72+
uses: pypa/cibuildwheel@v2.9.0
73+
env:
74+
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
75+
CIBW_ENVIRONMENT: IS_32_BIT='${{ env.IS_32_BIT }}'
76+
# We can't test directly with cibuildwheel, since we need to have to wheel location
77+
# to mount into the docker image
78+
CIBW_TEST_COMMAND_LINUX: "python {project}/ci/test_wheels.py"
79+
CIBW_TEST_COMMAND_MACOS: "python {project}/ci/test_wheels.py"
80+
CIBW_TEST_REQUIRES: hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-asyncio>=0.17
81+
CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "python ci/fix_wheels.py {wheel} {dest_dir}"
82+
CIBW_ARCHS_MACOS: x86_64 universal2
83+
CIBW_BUILD_VERBOSITY: 3
84+
85+
# Used to push the built wheels
86+
- uses: actions/setup-python@v3
87+
with:
88+
python-version: ${{ matrix.python[1] }}
89+
90+
- name: Test wheels (Windows 64-bit only)
91+
if: ${{ matrix.buildplat[1] == 'win_amd64' }}
92+
shell: cmd
93+
run: |
94+
python ci/test_wheels.py wheelhouse
95+
96+
- uses: actions/upload-artifact@v3
97+
with:
98+
name: ${{ matrix.python[0] }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }}
99+
path: ./wheelhouse/*.whl
100+
101+
- name: Upload wheels
102+
if: success()
103+
shell: bash
104+
env:
105+
PANDAS_STAGING_UPLOAD_TOKEN: ${{ secrets.PANDAS_STAGING_UPLOAD_TOKEN }}
106+
PANDAS_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.PANDAS_NIGHTLY_UPLOAD_TOKEN }}
107+
run: |
108+
source ci/upload_wheels.sh
109+
set_upload_vars
110+
# trigger an upload to
111+
# https://anaconda.org/scipy-wheels-nightly/pandas
112+
# for cron jobs or "Run workflow" (restricted to main branch).
113+
# Tags will upload to
114+
# https://anaconda.org/multibuild-wheels-staging/pandas
115+
# The tokens were originally generated at anaconda.org
116+
upload_wheels
117+
build_sdist:
118+
name: Build sdist
119+
if: >-
120+
github.event_name == 'schedule' ||
121+
github.event_name == 'workflow_dispatch' ||
122+
(github.event_name == 'pull_request' &&
123+
contains(github.event.pull_request.labels.*.name, 'Build')) ||
124+
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && ( ! endsWith(github.ref, 'dev0')))
125+
runs-on: ubuntu-latest
126+
env:
127+
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
128+
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
129+
steps:
130+
- name: Checkout pandas
131+
uses: actions/checkout@v3
132+
with:
133+
submodules: true
134+
# versioneer.py requires the latest tag to be reachable. Here we
135+
# fetch the complete history to get access to the tags.
136+
# A shallow clone can work when the following issue is resolved:
137+
# https://github.com/actions/checkout/issues/338
138+
fetch-depth: 0
139+
# Used to push the built wheels
140+
- uses: actions/setup-python@v3
141+
with:
142+
# Build sdist on lowest supported Python
143+
python-version: '3.8'
144+
- name: Build sdist
145+
run: |
146+
pip install build
147+
python -m build --sdist
148+
- name: Test the sdist
149+
run: |
150+
# TODO: Don't run test suite, and instead build wheels from sdist
151+
# by splitting the wheel builders into a two stage job
152+
# (1. Generate sdist 2. Build wheels from sdist)
153+
# This tests the sdists, and saves some build time
154+
python -m pip install dist/*.gz
155+
pip install hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-asyncio>=0.17
156+
cd .. # Not a good idea to test within the src tree
157+
python -c "import pandas; print(pandas.__version__);
158+
pandas.test(extra_args=['-m not clipboard and not single_cpu', '--skip-slow', '--skip-network', '--skip-db', '-n=2']);
159+
pandas.test(extra_args=['-m not clipboard and single_cpu', '--skip-slow', '--skip-network', '--skip-db'])"
160+
- uses: actions/upload-artifact@v3
161+
with:
162+
name: sdist
163+
path: ./dist/*
164+
165+
- name: Upload sdist
166+
if: success()
167+
shell: bash
168+
env:
169+
PANDAS_STAGING_UPLOAD_TOKEN: ${{ secrets.PANDAS_STAGING_UPLOAD_TOKEN }}
170+
PANDAS_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.PANDAS_NIGHTLY_UPLOAD_TOKEN }}
171+
run: |
172+
source ci/upload_wheels.sh
173+
set_upload_vars
174+
# trigger an upload to
175+
# https://anaconda.org/scipy-wheels-nightly/pandas
176+
# for cron jobs or "Run workflow" (restricted to main branch).
177+
# Tags will upload to
178+
# https://anaconda.org/multibuild-wheels-staging/pandas
179+
# The tokens were originally generated at anaconda.org
180+
upload_wheels

.pre-commit-config.yaml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ repos:
5353
rev: 5.0.4
5454
hooks:
5555
- id: flake8
56+
# Need to patch os.remove rule in pandas-dev-flaker
57+
exclude: ^ci/fix_wheels.py
5658
additional_dependencies: &flake8_dependencies
5759
- flake8==5.0.4
5860
- flake8-bugbear==22.7.1
@@ -236,6 +238,14 @@ repos:
236238
entry: python scripts/validate_min_versions_in_sync.py
237239
language: python
238240
files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
241+
- id: validate-errors-locations
242+
name: Validate errors locations
243+
description: Validate errors are in approriate locations.
244+
entry: python scripts/validate_exception_location.py
245+
language: python
246+
files: ^pandas/
247+
exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
248+
types: [python]
239249
- id: flake8-pyi
240250
name: flake8-pyi
241251
entry: flake8 --extend-ignore=E301,E302,E305,E701,E704
@@ -258,10 +268,10 @@ repos:
258268
|/_testing/
259269
- id: autotyping
260270
name: autotyping
261-
entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics
271+
entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bool-param --bytes-param --str-param --float-param
262272
types_or: [python, pyi]
263273
files: ^pandas
264-
exclude: ^(pandas/tests|pandas/io/clipboard)
274+
exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)
265275
language: python
266276
additional_dependencies:
267277
- autotyping==22.9.0

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
[![Package Status](https://img.shields.io/pypi/status/pandas.svg)](https://pypi.org/project/pandas/)
1212
[![License](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/main/LICENSE)
1313
[![Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=main)](https://codecov.io/gh/pandas-dev/pandas)
14+
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/pandas-dev/pandas/badge)](https://api.securityscorecards.dev/projects/github.com/pandas-dev/pandas)
1415
[![Downloads](https://static.pepy.tech/personalized-badge/pandas?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads%20per%20month)](https://pepy.tech/project/pandas)
1516
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
1617
[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)

asv_bench/benchmarks/ctors.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
MultiIndex,
77
Series,
88
Timestamp,
9+
date_range,
910
)
1011

1112
from .pandas_vb_common import tm
@@ -121,4 +122,28 @@ def time_multiindex_from_iterables(self):
121122
MultiIndex.from_product(self.iterables)
122123

123124

125+
class DatetimeIndexConstructor:
126+
def setup(self):
127+
128+
N = 20_000
129+
dti = date_range("1900-01-01", periods=N)
130+
131+
self.list_of_timestamps = dti.tolist()
132+
self.list_of_dates = dti.date.tolist()
133+
self.list_of_datetimes = dti.to_pydatetime().tolist()
134+
self.list_of_str = dti.strftime("%Y-%m-%d").tolist()
135+
136+
def time_from_list_of_timestamps(self):
137+
DatetimeIndex(self.list_of_timestamps)
138+
139+
def time_from_list_of_dates(self):
140+
DatetimeIndex(self.list_of_dates)
141+
142+
def time_from_list_of_datetimes(self):
143+
DatetimeIndex(self.list_of_datetimes)
144+
145+
def time_from_list_of_str(self):
146+
DatetimeIndex(self.list_of_str)
147+
148+
124149
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/index_object.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,38 @@
1919
class SetOperations:
2020

2121
params = (
22-
["datetime", "date_string", "int", "strings"],
22+
["monotonic", "non_monotonic"],
23+
["datetime", "date_string", "int", "strings", "ea_int"],
2324
["intersection", "union", "symmetric_difference"],
2425
)
25-
param_names = ["dtype", "method"]
26+
param_names = ["index_structure", "dtype", "method"]
2627

27-
def setup(self, dtype, method):
28+
def setup(self, index_structure, dtype, method):
2829
N = 10**5
2930
dates_left = date_range("1/1/2000", periods=N, freq="T")
3031
fmt = "%Y-%m-%d %H:%M:%S"
3132
date_str_left = Index(dates_left.strftime(fmt))
3233
int_left = Index(np.arange(N))
34+
ea_int_left = Index(np.arange(N), dtype="Int64")
3335
str_left = tm.makeStringIndex(N)
36+
3437
data = {
35-
"datetime": {"left": dates_left, "right": dates_left[:-1]},
36-
"date_string": {"left": date_str_left, "right": date_str_left[:-1]},
37-
"int": {"left": int_left, "right": int_left[:-1]},
38-
"strings": {"left": str_left, "right": str_left[:-1]},
38+
"datetime": dates_left,
39+
"date_string": date_str_left,
40+
"int": int_left,
41+
"strings": str_left,
42+
"ea_int": ea_int_left,
3943
}
44+
45+
if index_structure == "non_monotonic":
46+
data = {k: mi[::-1] for k, mi in data.items()}
47+
48+
data = {k: {"left": idx, "right": idx[:-1]} for k, idx in data.items()}
49+
4050
self.left = data[dtype]["left"]
4151
self.right = data[dtype]["right"]
4252

43-
def time_operation(self, dtype, method):
53+
def time_operation(self, index_structure, dtype, method):
4454
getattr(self.left, method)(self.right)
4555

4656

0 commit comments

Comments
 (0)