Skip to content

Commit 1e0eeed

Browse files
committed
Fixed merge conflict
2 parents a1eaa52 + 4fdd8b5 commit 1e0eeed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+453
-280
lines changed

.github/CODEOWNERS

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# github
2+
.github/ @mroeschke
3+
4+
# ci
5+
ci/ @mroeschke
6+
7+
# web
8+
web/ @datapythonista
9+
10+
# docs
11+
doc/cheatsheet @Dr-Irv
12+
13+
# pandas
14+
pandas/_libs/ @WillAyd
15+
pandas/_libs/tslibs/* @MarcoGorelli
16+
pandas/_typing.py @Dr-Irv
17+
pandas/core/groupby/* @rhshadrach
18+
pandas/core/tools/datetimes.py @MarcoGorelli
19+
pandas/io/excel/* @rhshadrach
20+
pandas/io/formats/style.py @attack68
21+
pandas/io/formats/style_render.py @attack68
22+
pandas/io/formats/templates @attack68

.pre-commit-config.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,7 @@ repos:
6565
rev: 1.6.1
6666
hooks:
6767
- id: cpplint
68-
# We don't lint all C files because we don't want to lint any that are built
69-
# from Cython files nor do we want to lint C files that we didn't modify for
70-
# this particular codebase (e.g. src/headers, src/klib). However,
71-
# we can lint all header files since they aren't "generated" like C files are.
72-
exclude: ^pandas/_libs/src/(klib|headers)/
68+
exclude: ^pandas/_libs/include/pandas/vendored/klib
7369
args: [
7470
--quiet,
7571
'--extensions=c,h',

MANIFEST.in

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,4 @@ prune pandas/tests/io/parser/data
5757

5858
# Selectively re-add *.cxx files that were excluded above
5959
graft pandas/_libs/src
60-
graft pandas/_libs/tslibs/src
61-
include pandas/_libs/pd_parser.h
62-
include pandas/_libs/pd_parser.c
60+
graft pandas/_libs/include

asv_bench/benchmarks/join_merge.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,38 @@ def time_i8merge(self, how):
324324
merge(self.left, self.right, how=how)
325325

326326

327+
class MergeDatetime:
328+
params = [
329+
[
330+
("ns", "ns"),
331+
("ms", "ms"),
332+
("ns", "ms"),
333+
],
334+
[None, "Europe/Brussels"],
335+
]
336+
param_names = ["units", "tz"]
337+
338+
def setup(self, units, tz):
339+
unit_left, unit_right = units
340+
N = 10_000
341+
keys = Series(date_range("2012-01-01", freq="T", periods=N, tz=tz))
342+
self.left = DataFrame(
343+
{
344+
"key": keys.sample(N * 10, replace=True).dt.as_unit(unit_left),
345+
"value1": np.random.randn(N * 10),
346+
}
347+
)
348+
self.right = DataFrame(
349+
{
350+
"key": keys[:8000].dt.as_unit(unit_right),
351+
"value2": np.random.randn(8000),
352+
}
353+
)
354+
355+
def time_merge(self, units, tz):
356+
merge(self.left, self.right)
357+
358+
327359
class MergeCategoricals:
328360
def setup(self):
329361
self.left_object = DataFrame(

ci/code_checks.sh

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -274,16 +274,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
274274
pandas.TimedeltaIndex.as_unit \
275275
pandas.TimedeltaIndex.to_pytimedelta \
276276
pandas.TimedeltaIndex.mean \
277-
pandas.PeriodIndex.is_leap_year \
278-
pandas.PeriodIndex.minute \
279-
pandas.PeriodIndex.month \
280-
pandas.PeriodIndex.quarter \
281-
pandas.PeriodIndex.second \
282-
pandas.PeriodIndex.week \
283-
pandas.PeriodIndex.weekday \
284-
pandas.PeriodIndex.weekofyear \
285-
pandas.PeriodIndex.year \
286-
pandas.PeriodIndex.to_timestamp \
287277
pandas.core.window.rolling.Rolling.max \
288278
pandas.core.window.rolling.Rolling.cov \
289279
pandas.core.window.rolling.Rolling.skew \

doc/source/whatsnew/v2.0.3.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,15 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
1617
-
1718

1819
.. ---------------------------------------------------------------------------
1920
.. _whatsnew_203.bug_fixes:
2021

2122
Bug fixes
2223
~~~~~~~~~
23-
-
24+
- Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`)
2425

2526
.. ---------------------------------------------------------------------------
2627
.. _whatsnew_203.other:

doc/source/whatsnew/v2.1.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ Numeric
347347
^^^^^^^
348348
- Bug in :class:`RangeIndex` setting ``step`` incorrectly when being the subtrahend with minuend a numeric value (:issue:`53255`)
349349
- Bug in :meth:`Series.corr` and :meth:`Series.cov` raising ``AttributeError`` for masked dtypes (:issue:`51422`)
350+
- Bug when calling :meth:`Series.kurt` and :meth:`Series.skew` on numpy data of all zero returning a python type instead of a numpy type (:issue:`53482`)
350351
- Bug in :meth:`Series.mean`, :meth:`DataFrame.mean` with object-dtype values containing strings that can be converted to numbers (e.g. "2") returning incorrect numeric results; these now raise ``TypeError`` (:issue:`36703`, :issue:`44008`)
351352
- Bug in :meth:`DataFrame.corrwith` raising ``NotImplementedError`` for pyarrow-backed dtypes (:issue:`52314`)
352353
- Bug in :meth:`DataFrame.size` and :meth:`Series.size` returning 64-bit integer instead of int (:issue:`52897`)
@@ -403,6 +404,7 @@ I/O
403404
- Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`)
404405
- Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
405406
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
407+
- Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`)
406408
- Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`)
407409

408410
Period
@@ -440,6 +442,7 @@ Groupby/resample/rolling
440442
Reshaping
441443
^^^^^^^^^
442444
- Bug in :func:`crosstab` when ``dropna=False`` would not keep ``np.nan`` in the result (:issue:`10772`)
445+
- Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`)
443446
- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
444447
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
445448
- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)

pandas/_libs/groupby.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept n
9191
a = tmp
9292
n -= na_count
9393

94-
result = calc_median_linear(a, n, na_count)
94+
result = calc_median_linear(a, n)
9595

9696
if na_count:
9797
free(a)
@@ -128,15 +128,15 @@ cdef float64_t median_linear(float64_t* a, int n) noexcept nogil:
128128
a = tmp
129129
n -= na_count
130130

131-
result = calc_median_linear(a, n, na_count)
131+
result = calc_median_linear(a, n)
132132

133133
if na_count:
134134
free(a)
135135

136136
return result
137137

138138

139-
cdef float64_t calc_median_linear(float64_t* a, int n, int na_count) noexcept nogil:
139+
cdef float64_t calc_median_linear(float64_t* a, int n) noexcept nogil:
140140
cdef:
141141
float64_t result
142142

pandas/_libs/tslibs/src/datetime/pd_datetime.h renamed to pandas/_libs/include/pandas/datetime/pd_datetime.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ See NUMPY_LICENSE.txt for the license.
2222
#endif // NPY_NO_DEPRECATED_API
2323

2424
#include <numpy/ndarraytypes.h>
25-
#include "np_datetime.h"
26-
#include "np_datetime_strings.h"
27-
#include "date_conversions.h"
25+
#include "pandas/vendored/numpy/datetime/np_datetime.h"
26+
#include "pandas/vendored/numpy/datetime/np_datetime_strings.h"
27+
#include "pandas/datetime/date_conversions.h"
2828

2929
#ifdef __cplusplus
3030
extern "C" {

pandas/_libs/pd_parser.h renamed to pandas/_libs/include/pandas/parser/pd_parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ extern "C" {
1414

1515
#define PY_SSIZE_T_CLEAN
1616
#include <Python.h>
17-
#include "src/parser/tokenizer.h"
17+
#include "pandas/parser/tokenizer.h"
1818

1919
typedef struct {
2020
int (*to_double)(char *, double *, char, char, int *);

pandas/_libs/src/parser/tokenizer.h renamed to pandas/_libs/include/pandas/parser/tokenizer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ See LICENSE for the license
1919
#define ERROR_INVALID_CHARS 3
2020

2121
#include <stdint.h>
22-
#include "../inline_helper.h"
23-
#include "../headers/portable.h"
22+
#include "pandas/inline_helper.h"
23+
#include "pandas/portable.h"
2424

25-
#include "khash.h"
25+
#include "pandas/vendored/klib/khash.h"
2626

2727
#define STREAM_INIT_SIZE 32
2828

pandas/_libs/src/headers/portable.h renamed to pandas/_libs/include/pandas/portable.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
1+
/*
2+
Copyright (c) 2016, PyData Development Team
3+
All rights reserved.
4+
5+
Distributed under the terms of the BSD Simplified License.
6+
7+
The full license is in the LICENSE file, distributed with this software.
8+
*/
9+
110
#pragma once
211

312
#include <string.h>
413

514
#if defined(_MSC_VER)
6-
#define strcasecmp( s1, s2 ) _stricmp( s1, s2 )
15+
#define strcasecmp(s1, s2) _stricmp(s1, s2)
716
#endif
817

918
// GH-23516 - works around locale perf issues

pandas/_libs/src/skiplist.h renamed to pandas/_libs/include/pandas/skiplist.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Python recipe (https://rhettinger.wordpress.com/2010/02/06/lost-knowledge/)
1919
#include <stdio.h>
2020
#include <stdlib.h>
2121
#include <string.h>
22-
#include "inline_helper.h"
22+
#include "pandas/inline_helper.h"
2323

2424
PANDAS_INLINE float __skiplist_nanf(void) {
2525
const union {

pandas/_libs/src/klib/khash.h renamed to pandas/_libs/include/pandas/vendored/klib/khash.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ int main() {
112112
#include <stdlib.h>
113113
#include <string.h>
114114
#include <limits.h>
115-
#include "../inline_helper.h"
115+
#include "pandas/inline_helper.h"
116116

117117

118118
// hooks for memory allocator, C-runtime allocator used per default

pandas/_libs/src/ujson/lib/ultrajson.h renamed to pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ tree doesn't have cyclic references.
5353

5454
#include <stdio.h>
5555
#include <wchar.h>
56-
#include "../../headers/portable.h"
56+
#include "pandas/portable.h"
5757

5858
// Don't output any extra whitespaces when encoding
5959
#define JSON_NO_EXTRA_WHITESPACE

pandas/_libs/khash.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ from numpy cimport (
1515
)
1616

1717

18-
cdef extern from "khash_python.h":
18+
cdef extern from "pandas/vendored/klib/khash_python.h":
1919
const int KHASH_TRACE_DOMAIN
2020

2121
ctypedef uint32_t khuint_t

pandas/_libs/khash_for_primitive_helper.pxi.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ primitive_types = [('int64', 'int64_t'),
2424

2525
{{for name, c_type in primitive_types}}
2626

27-
cdef extern from "khash_python.h":
27+
cdef extern from "pandas/vendored/klib/khash_python.h":
2828
ctypedef struct kh_{{name}}_t:
2929
khuint_t n_buckets, size, n_occupied, upper_bound
3030
uint32_t *flags

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ cdef extern from "numpy/arrayobject.h":
9393
cdef extern from "numpy/ndarrayobject.h":
9494
bint PyArray_CheckScalar(obj) nogil
9595

96-
cdef extern from "pd_parser.h":
96+
cdef extern from "pandas/parser/pd_parser.h":
9797
int floatify(object, float64_t *result, int *maybe_int) except -1
9898
void PandasParser_IMPORT()
9999

pandas/_libs/meson.build

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -61,53 +61,41 @@ subdir('tslibs')
6161
libs_sources = {
6262
# Dict of extension name -> dict of {sources, include_dirs, and deps}
6363
# numpy include dir is implicitly included
64-
'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper],
65-
'include_dirs': klib_include},
64+
'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper]},
6665
'arrays': {'sources': ['arrays.pyx']},
6766
'groupby': {'sources': ['groupby.pyx']},
6867
'hashing': {'sources': ['hashing.pyx']},
69-
'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper],
70-
'include_dirs': klib_include},
71-
'index': {'sources': ['index.pyx', _index_class_helper],
72-
'include_dirs': [klib_include, 'tslibs']},
68+
'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper]},
69+
'index': {'sources': ['index.pyx', _index_class_helper]},
7370
'indexing': {'sources': ['indexing.pyx']},
7471
'internals': {'sources': ['internals.pyx']},
75-
'interval': {'sources': ['interval.pyx', _intervaltree_helper],
76-
'include_dirs': [klib_include, 'tslibs']},
72+
'interval': {'sources': ['interval.pyx', _intervaltree_helper]},
7773
'join': {'sources': ['join.pyx', _khash_primitive_helper],
78-
'include_dirs': klib_include,
7974
'deps': _khash_primitive_helper_dep},
80-
'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c'],
81-
'include_dirs': [klib_include, inc_datetime]},
82-
'missing': {'sources': ['missing.pyx'],
83-
'include_dirs': [inc_datetime]},
84-
'pandas_datetime': {'sources': ['tslibs/src/datetime/np_datetime.c',
85-
'tslibs/src/datetime/np_datetime_strings.c',
86-
'tslibs/src/datetime/date_conversions.c',
87-
'tslibs/src/datetime/pd_datetime.c']},
88-
#'include_dirs':
75+
'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']},
76+
'missing': {'sources': ['missing.pyx']},
77+
'pandas_datetime': {'sources': ['src/vendored/numpy/datetime/np_datetime.c',
78+
'src/vendored/numpy/datetime/np_datetime_strings.c',
79+
'src/datetime/date_conversions.c',
80+
'src/datetime/pd_datetime.c']},
8981
'pandas_parser': {'sources': ['src/parser/tokenizer.c',
9082
'src/parser/io.c',
91-
'pd_parser.c'],
92-
'include_dirs': [klib_include]},
83+
'src/parser/pd_parser.c']},
9384
'parsers': {'sources': ['parsers.pyx', 'src/parser/tokenizer.c', 'src/parser/io.c'],
94-
'include_dirs': [klib_include, 'src'],
9585
'deps': _khash_primitive_helper_dep},
96-
'json': {'sources': ['src/ujson/python/ujson.c',
97-
'src/ujson/python/objToJSON.c',
98-
'src/ujson/python/JSONtoObj.c',
99-
'src/ujson/lib/ultrajsonenc.c',
100-
'src/ujson/lib/ultrajsondec.c'],
101-
'include_dirs': ['tslibs/src/datetime', 'src/ujson/lib', 'src/ujson/python']},
86+
'json': {'sources': ['src/vendored/ujson/python/ujson.c',
87+
'src/vendored/ujson/python/objToJSON.c',
88+
'src/vendored/ujson/python/JSONtoObj.c',
89+
'src/vendored/ujson/lib/ultrajsonenc.c',
90+
'src/vendored/ujson/lib/ultrajsondec.c']},
10291
'ops': {'sources': ['ops.pyx']},
10392
'ops_dispatch': {'sources': ['ops_dispatch.pyx']},
10493
'properties': {'sources': ['properties.pyx']},
10594
'reshape': {'sources': ['reshape.pyx']},
10695
'sas': {'sources': ['sas.pyx']},
10796
'byteswap': {'sources': ['byteswap.pyx']},
10897
'sparse': {'sources': ['sparse.pyx', _sparse_op_helper]},
109-
'tslib': {'sources': ['tslib.pyx'],
110-
'include_dirs': inc_datetime},
98+
'tslib': {'sources': ['tslib.pyx']},
11199
'testing': {'sources': ['testing.pyx']},
112100
'writers': {'sources': ['writers.pyx']}
113101
}
@@ -118,7 +106,7 @@ foreach ext_name, ext_dict : libs_sources
118106
ext_name,
119107
ext_dict.get('sources'),
120108
cython_args: ['--include-dir', meson.current_build_dir()],
121-
include_directories: [inc_np] + ext_dict.get('include_dirs', ''),
109+
include_directories: [inc_np, inc_pd],
122110
dependencies: ext_dict.get('deps', ''),
123111
subdir: 'pandas/_libs',
124112
install: true

0 commit comments

Comments
 (0)