From 4798ccb86eabdf1525976e181973a77cc90dc0a0 Mon Sep 17 00:00:00 2001 From: joelchen Date: Sun, 27 Nov 2022 02:35:37 -0500 Subject: [PATCH 1/6] fix left_sorter to support duplicate left values --- pandas/_libs/intervaltree.pxi.in | 4 +++- pandas/_libs/intervaltree.pxi.pdf | Bin 0 -> 15295 bytes 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 pandas/_libs/intervaltree.pxi.pdf diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index e7a310513d2fa..3b456a8f1b21b 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -81,7 +81,9 @@ cdef class IntervalTree(IntervalMixin): """How to sort the left labels; this is used for binary search """ if self._left_sorter is None: - self._left_sorter = np.argsort(self.left) + left_right = np.asarray([(self.left[i], self.right[i]) for i in range(0, + len(self.left))], dtype=[('l', self.dtype), ('r', self.dtype)]) + self._left_sorter = np.argsort(left_right, order=('l', 'r')) return self._left_sorter @property diff --git a/pandas/_libs/intervaltree.pxi.pdf b/pandas/_libs/intervaltree.pxi.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3b456a8f1b21ba89320ab356b5acd479d274879b GIT binary patch literal 15295 zcmeHOTW=f5m44^1C|C$cArx&-vU$*2M!dF@2${8K$KDJUTA|S*i&C$f-As4WvPNKj z`+na!b!#>$U)FhB0y8GNPn|mVTa_L?dNg`(SL-tGtXbBLDXPvkAM>(ntR0PhdGYg` z^EZD!HLu>9H*en?yC^y{TYoCdpU!`LZ5Bmi=UsU>)n`R(u5D!-;98hvQ?KOltf)qh zu+2!Gtn+G-x3jD)uG(2%Ue~QDR_nUyOwnemx~jXnD(0inyt`Z5gTm9_J{ z%$uw`9T|M^s){BuG*zb6)0c+uLE{xBK_vQ&AnOY@x0fW^PG{%3e&us7+A#|CUoJ^>S&t8*8Y>rs!@~ zW?|cTQ(W;~Wqv6>6wqOjE7F^;Teqi=AKPkn>)*`k=K3*zJ@#Z|ag$qMZmhAZ4Xm@M zu4S+DYGFUUuC9y9s;w^bxfPAj#z>nm+Sq(iSLK~<&BrjirpfP2X_wv9G{yCe+793O z&EM1uYnr-_Z|bZ6frXm|tx>QOyzLrDvW5u4BL|nki0hBGDf9Ij6i-dY?Xnj3Z5#ZO zZQakX_NTk7ZHr%R)|srzvw6L#x{Dh9;%qEv$9$PDGom_s z`s9hzB?f<%UOzl+3BA)-xx*i)vA<&YoTdK%)E+JR115O4VF} zfki9P#OCvx5K?C5McZyxFpjP^^SbKtqB2$8L-i(q)6T*Z6*368ACn+(7+4)&)!hvr zR2G(JoAGvP>UCGtRbGbmB<`0R%lBeDg>$V|49j7MrP7GG!AV;ztTPVxo!j*AAx63_ zXXce%=9{u>$&jNg5+9TS`myl=@`5@*MBa&5n!0Sw?TuBs=ntm7ExP%Qg-{t)=df;Z zhzj-^)ktnZ$W^oH>E+ginqGH{u+90C^N4=vgyyaRqB^3$14KdBNo@ zVrRkreWw~L&LYm8WrxVww=nQf!0qNxzp~ln)io)rlvBTQ3@iJ?@5hN~gb1glGas@7 z(j)WdznC9>_q%`j;ndiw-5}OhwW;!o+U%RwDsMkv^M63dMGFWCMzg-0WwQ{#08Ik9 z@YtLf{yT&7Pj;l?geYtI{eploy2x`cvG`*LDoV$FbMFJEWG0)#tIklxyn3Cz{PD%R zcdy@FAdo93qWohYPbABKA-|<_aI?YU| zz08}svT7R-1Hw^MiU~gAv4ZNt|>C{5M=@ZxLMfdO!olR5>s|tf?_Npk|X1|V<`yO16eR(gh4=|f9J;ZIlsEHPPRo7)SnUsT9EHGA5qiKog zD=ZeZKKHjKDV1O_Sc2%Er2m(w9*|%1)+we>$HBCuGU)?TddyQpEAOekzbNLIBlGM` zQ!?uT!!n`u>8~x2BN0Fs>l!@ZF#@P#ENHi0OFT44nnInA$!A9E7z1xc3F68=n)Q*# zA#1uE-d`i*Zkh&Vc@UO)#*O)A-Pn&sy+JqSB-Y%(Owmvp|Y&YNxk;` zOan8=v%I>Sz@Y=zpH=J7siFMPfTHMTl?B~ie~G>StnuT0vq4v~$Kqjq0W~z_QJV>S z6l4Fs$x8@75T8K6BQ~~0Y7z_i6Z|9gA*siC%Zds3xp@+ulP2Xv(#N6rVV$ZH(Tv`4|MaUjO3s z(!dM>@KBC);%6DZuG@lL0Q|6que9qLX_>uaZ8lYbIyX27s&-b~yRM2#`$iJ&b@34; zxHgS+5uwNtlLRR$A@F`=-oJhI_SD>>9kwZ3V9$lO0LwnLub`w6tjh%wbQOT9zvOk% zdvVUX?{DZ+Q2=P|8Y$A>&~E~i6k%>3ynhl<$`}Y@atWNt+NmIEDM?V*B>goX@FT9v z9(pEC#Y731I>E6*2pbXHewy2LXWpV|#uVpP`hdPv2&K`G08%N{s3*}6O%aj{Hkjz= zN_PBr<_00IL?lCbRNwYCBAPX(A5k=X0=H2DXN$bcXO%fSGfpN$DjXzwdj#Fd$cub2qDIQmRtuKYaw?H~lWwhKhhSsq0l3zG zqLO>O1{mkD?qyeJ+D0L$w$cN!u~_@QV#i;zs;)BiED4l*T{p50Z}~E34ZQTc)X+Nv z-Sqc|y_EU>#QA-U44f)8c8w<5F7{Uu|4iSUnpJ^$PjwwFQ6+!SNge#b_ffz9@5KK{ zC#K6danGEe7vJPeF7C|^debzRv3FCy@4+RmyrBnX84S2_Q$b0M=w+juIgg%9sOQ7t7-;ChKG=lK_>iCFqi6S0 zKrk*G&ocBTE1P9o%=0BuL@$qV#!Mg;Ob+t$__?0vC@y1S7-lQfC!vyQ7EDa=N-s(# zXK{xfaa9;=3B1lg9$uiL^*cnom~Fbv`9(1Gw)zI)^x0NHu8WVDUZ~jQJB~XZ5aK=* z6U22&W-H@IEO+E?_1H!pD3@E`6!8pes0QIr;w(?9S`DyKA3olZ@bju&vr?5aEle7C zq#=_S7@*fHW|zxNDPzNhz1m#kXz$vrHx1%ya=4LV{bG~cv9hq`(12FZY?w-_6hv4q zctzVI)ulIsXlTk{(pV9W5p&nT_5H2982_xEeGZX-p8-v4qWOX;j^{4U1s zM4bn05^DHG56;K#5Qhs~UN`gATBL=nk8k1xf+h$YGgnw1kvuz-|BsJjHGl~(Ph<7D zg7n_{lRmfftiRTu|8b#1og6cH%5bV$p~b|I4I{T_WA=Ui6God`kJB_s5amX+!TNa;4AV9K6g=J$o#6xv_4w2ka_$($-_UBMV zEMhgjI-a<4PwDH5uC$Xxdvt^N3HBlfhqKQ?d#eiih?w{-XeXM|5I|hiEB_%t(Phx{ zA;?ry^wRBiG)j+@puKxMy1x(pYJ{fs(P^z7)OOyY|38J$Eg{8-$*4U4%bkNz69e=WW?q!Z`-vel6P%EjeDI>*jTl zUlF@t!8-Hj%o#-y+*4+9OVq0jZ2nvx)1$#lF$w~R;!q1CBqmU9tf%dQ3PV}`^jb>5930T!7fDdCrIO1dT7Z1z8ddY*MNTqt z6VLr{i34Q@)$1}a~We!vl<|M5`_(_<#ke*Ezg1kGp%&$+RjfwA2{iw0hXAe_#~I(UwGxf^es+z@-r3|FQW2({AM!-=ouTA@n3yNQ!DH zZNp5E>{LU89bCga$2fY@izk?t5IFsKOHk_MRt3#Y=-IOmmx|@2+yK7s?|y?m@@M3Q z#xKBq8}lm$xB;%TMB^naQDhk3xOlH$T`=c#-!&3)f3sNL#p9&mIF6{ojisJ^gc#Y0 zQN-h{>}@9IAX!V8am!=$@a%l{UmtV>LwyYi1;W2<_D>>Oy=LHJ zhI29_W4UETd9CHl^;gO$C*4gxa>@LDu$9>^pqaAIwnIw=(ei0F8B2hHd%1hLg2?^cgG&c|E|B@g`U~3auoHPyIvpu-FQ$u7>VA(9?ISol zJU4e7hVeBgzq;-TG@ixTXV@CXoZG`Pt%fS&K_IxDlmYOzd#j z+!wf87djxlWg5l@oEijCImhua+ud&0E%zJ4Fk(A{W1sB=9nez~I~mE1Rr$t$_D5}B zK&I_PKS~{$x4ce>Y8`i;(4*aAWM0Fj=eR(eBLuTALssW2+iC-c)5sS0w=p~E|1AZN zOe_4I0PZ|7ExN10`u7A~vzg<{t3@YSwH>a`;4Y1{rKZHK%{*R3BR}XJ_3sFJGWe>j zKs26tnsLky{DqJ$29YKVpC`oJ_Xcy~>|e=|#NhOWC9&Yz*CoA2A)tR@lr%##|EEpp zlz86U@Gt_0wB90hCMCJ2AMcJ#S=S#74yY+>Ew)^z&FK|xl3io1E^lzC{xQddUN(*< z>4c=Bc=lkLbUnOp3v_%$QM!t>rzyc3B9#cqRfDo(pMi79eg*5w#B~YkKTt8RC@FRO zb=)_1zmWf4A%1@t5Y9J!E$>q>q4)s-2Q5jMW#_{|gbri&6C)Ce(EWRC8o7F8N>QTQ zKL2{HiGhEe+H}dc=ncn}Pp9TY{(D%5`@|UZx?gToz;{xjs6g3NjkzhMix5+ literal 0 HcmV?d00001 From 8542c22d7c6acf5ee006899bddc8c5fa03755229 Mon Sep 17 00:00:00 2001 From: joelchen Date: Sun, 27 Nov 2022 02:59:20 -0500 Subject: [PATCH 2/6] fix style --- pandas/_libs/intervaltree.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 3b456a8f1b21b..e4b7f65990895 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -81,7 +81,7 @@ cdef class IntervalTree(IntervalMixin): """How to sort the left labels; this is used for binary search """ if self._left_sorter is None: - left_right = np.asarray([(self.left[i], self.right[i]) for i in range(0, + left_right = np.asarray([(self.left[i], self.right[i]) for i in range(0, len(self.left))], dtype=[('l', self.dtype), ('r', self.dtype)]) self._left_sorter = np.argsort(left_right, order=('l', 'r')) return self._left_sorter From 69fd497245bdf03b30e49616c04bc4442359f86d Mon Sep 17 00:00:00 2001 From: joelchen Date: Sun, 27 Nov 2022 03:33:19 -0500 Subject: [PATCH 3/6] add regression test case --- pandas/tests/indexes/interval/test_interval.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index c8d7470032e5f..0c990e8fc2435 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -791,6 +791,13 @@ def test_is_overlapping(self, start, shift, na_value, closed): result = index.is_overlapping assert result is expected + # intervales with duplicate left values + a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85] + b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90] + index = IntervalIndex.from_arrays(a, b, closed="right") + result = index.is_overlapping + assert result is False + @pytest.mark.parametrize( "tuples", [ From 41ce4e57978e134fbbe84c813c7ab42efe9f83ee Mon Sep 17 00:00:00 2001 From: joelchen Date: Sun, 27 Nov 2022 04:02:22 -0500 Subject: [PATCH 4/6] add doc --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 97ee96d8be25d..a3448b7eb53a8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -672,7 +672,7 @@ Strings Interval ^^^^^^^^ -- +- Bug in :meth:`IntervalIndex.is_overlapping` incorrect output if interval has duplicate left boundaries (:issue:`49581`) - Indexing From 2949448ead181a32dfd45bdfcc18a73c86be9e8f Mon Sep 17 00:00:00 2001 From: joelchen Date: Wed, 30 Nov 2022 02:08:10 -0500 Subject: [PATCH 5/6] change np.argsort to np.lexsort --- pandas/_libs/intervaltree.pxi.in | 5 ++--- pandas/_libs/intervaltree.pxi.pdf | Bin 15295 -> 15152 bytes .../tests/indexes/interval/test_interval.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index e4b7f65990895..0d7c96a6f2f2b 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -81,9 +81,8 @@ cdef class IntervalTree(IntervalMixin): """How to sort the left labels; this is used for binary search """ if self._left_sorter is None: - left_right = np.asarray([(self.left[i], self.right[i]) for i in range(0, - len(self.left))], dtype=[('l', self.dtype), ('r', self.dtype)]) - self._left_sorter = np.argsort(left_right, order=('l', 'r')) + values = [self.right, self.left] + self._left_sorter = np.lexsort(values) return self._left_sorter @property diff --git a/pandas/_libs/intervaltree.pxi.pdf b/pandas/_libs/intervaltree.pxi.pdf index 3b456a8f1b21ba89320ab356b5acd479d274879b..0d7c96a6f2f2b31f4e042ecccd02ae78002605d9 100644 GIT binary patch delta 28 kcmdmAzM*V`4A*2v0maGNxRfVb3&>A?z{j>(gnP9C0GMM5p8x;= delta 198 zcmdl`w!eIX43}q4YFbHrQD%BZiGr;{UV&a>abi(XVx>m3MsaFRnjT0xIx|*B0n7ud z19CML((;QGG8HoO6p9k_(o;1IbQC6k)EBnRL+H}f1nNpDsVqpfjn+`lQHL4<7SYsE z&`>Wz6^hlIY{R9Znpl)xoL^L;fpC|OLVi(7YLP9(5TG_q&B=~@Vw+pJRvQ2S+tWgL diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 0c990e8fc2435..98c21fad1f8c2 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -791,7 +791,7 @@ def test_is_overlapping(self, start, shift, na_value, closed): result = index.is_overlapping assert result is expected - # intervales with duplicate left values + # intervals with duplicate left values a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85] b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90] index = IntervalIndex.from_arrays(a, b, closed="right") From 7c08ff1c52a85f8f9d3d0d3c4f474b0e5aaee4fa Mon Sep 17 00:00:00 2001 From: joelchen Date: Thu, 1 Dec 2022 16:09:30 -0500 Subject: [PATCH 6/6] remove an unintentionally committed file --- pandas/_libs/intervaltree.pxi.pdf | Bin 15152 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 pandas/_libs/intervaltree.pxi.pdf diff --git a/pandas/_libs/intervaltree.pxi.pdf b/pandas/_libs/intervaltree.pxi.pdf deleted file mode 100644 index 0d7c96a6f2f2b31f4e042ecccd02ae78002605d9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15152 zcmeHOZExGymHzHuaS@}CjLJ$f?e>G$4w_BU*66G=JIQvi@CF1eUD@1Fq)Jk8Jcjz) z_j%5_Zxkgv$)?{{z#uev@44sw_0E$gPY&MO<*Lj(YZi55imJ2Cr@ZVMYYz^7dHM6j z`Nf}4%ZQD%7S+KM z9CILVR(Ung+i6x7SM4+}uj|$n%T?WUrf9QeUDaJ(6|;kbS$DUxbGtB$bqi=fD{E(Y znKxN?a$xYon?L+M>*RS+*17(*y>C6-$X8#mb8uQXUykenJ~%iu$KT}#K<7>Z?sB}vZ||#d}c)=)G^{Fgf=#x*Hw9^NAoq*u4(c+Q`$v0F->uOqq@Uy z{_u->ZcS6y@k@R6KhSV9r#1?7E^oUAoUFlu@XD>rpv3hj+m!ig1&k*q<8fIF{k9GM zl4ISUVee0OS=$!B+N?8Km1nbhU3CZYl_JZsqAI#98@IMxOj7hG4vDjofF1E;zQ_pb z?D?~2PL@R2vxd(<~_seV5g|Se`xq5Ec^TkD$@Ou9d6i z3Ixns@g_E(-2|61H80zCy@YafwVBmbmlu_(>K>?z{Gy$PH_Buna6ckKpfHd+x~jVy zzNjoT&o-mY#MG;Fm|LeD?wQB*;K4__ zBd6xIUF7SsYe|s9EFvF-0sL|B9`u4ZfJEL2TbjCT&FzgM=KUJx(%ry8ue|uVlnh%!UCiItwz=z(xv~vF zEc0q+BT1)%pA;2BPXX^Ck`xsh z=_#WV^PBdG`HegDNDYKgC^&OQ9nD)Y@iE^V1!}}N`U~V$TtUm4iizv8#^^s*zTzyx+}gGeon0FP4F$|@2JtJIOeRl{l2kxVt@OMGd{ z&2ij2F>?OV<+BfJUJO9jwgbaaNm&hmh-9v~zC8J0poUoRP*8I0XWqQ3+k$B~?68Kd zw5u9%ki}YU)>VPTG8hQbWTv>=ri#?|Mk3pF@d-JvR%tX5!N?(#I4KGt@aWLIfBX9F ziMd7nX;ZR5o^x+uEepfGfRf6qF6W4dRai{h8T;VG9hFl=2NsR;!1;_+0xnMmr#n7=zC}qn0QSPeGQTy?cfgZ^t(E8ybPP#1_xfKHa*x)q#yJM}v8yw!Ti{e% zDLR^^!1ws9J} zLe*;LyR(RYrfp8lvOt5Sx{jKtoZoAtZvEcxQNI6g#Q#SlrpY*VPo13?-(*bA?@jkw z(|H&cJ^-YqV@p#^3c6u5D1I0=m?9;2B#iyn_o;r1o1X5xK$FNNi1aoz;xrm2I4P(lP-g=UG8+5<6*j1LPvb*bGk#s z^W_ThTH^Eh-nJ{5GefVd$(GvmjOklH_t#jUedsR#Xve-I+tzFJswSaB>k8@Ei++#Q zNrn*pigd$r^Zd2nE3EQanv=;-cQ4ua%?Jf~k)Gwwn3!jfvEn2tlV)cbf*SK9_FTq! zS!Virf~Z&e_wc*xE5reZrdth{A++sUD9DBMFD&!09XMGf`vK$io|ywSGs0 zi)NxboDTx2chom9r%$#ra#egn_d?kw&zLVfz{Fi3#_;Rp%trcs*zVBX=$u4eD3u#u z6me)Y6ob$wewL%5Rvm1Vhxd0x{Jd&c%v5EFgiZrz2-1mx0(!ZkS-4o2(p;b0tMxTz zY1d}8Zs1pwX+jG1i&b(<%fgXE1zJL~Y!J&1<$^HFIj^XD#Jcoq5EV@cObRQmqsJ70 ziGuB3sO^LCa$?3M+WQQ8@Hm(_Ru;t!d01c&@hhIGr6_mM$bmI}!ecr&VdXY277NTt zVeUFYz*2y64JUQJ2M71}E0k^r3hc~WemIb=G*%XVmm@dA=0r+Q?r{f`8nDuq~%gU zhECa{_)G+`B#T&B7M4Re6vtzLYkYP*7R21lXWA8AX~&6dX$J8Z z^hG9hkFNGw732{i@hM0rs?u`qBABQ2Dc@r z9NnYAM^Oqc5WyiAhD%IVxwf8`3(^>51l4Na$C5p{)mJk`2WaHD8_l9Ak_0#hZ?nld zwljy(1rn9ZE$%*s&ycCQ4q&hVaEy81Hk^lM-Lk|%if|4CXGJy1W(^nYKFNTTW7HMm`|*0EHy&sOEiNfB?Fwfdx^KvxP?8-} zv)ZXS=a9i=QfPW&LdJU&D8r=b%8n>TXZb2RyhGGTv?rLD>K7V{)u>}56*d^VUV$Pd`4kG4EF30b+t6b-i)u9h`YbXVSj*=mFUPI& zV+}a?`VMO%XNFuvvX_@LUQ)w_HKT^O&8MRQVWu%4tm2K9-T91 zci$Bfa$&Mq+{JOyupLKG4vcFU_K=a2=tVrt%HC$84idG57`Huo4-etdf4$QU1ofFH z>=63puzv~)-hdKT?&8C)Ff8b$AEH(M_dwIwb8_WjuR*8$4~&RH+`$U>Q90t{eu{;D z`4%)}XL1ebK|B)Iva9`imgKj>%v*AWeOG*IuI89jFC2x1kr^9mZLH0g)~Er7D|=PCULlZHgUVB z%h#7%(-*0Q1p#%~dV*~7r5g_5-#C8=Juqc2`0dUjaoS>3vNy2ExA_A4afmbYq7jRS zR&Ade*E%y4OWN|@VnqQfBA@=gB`J6NAKWSg4e9hp`@Jsj{*n}Z>(s%{-mrs<=>&KW znK=)5k|a1Ww@rkRqoFnu(ZMF9GNeyzo6U> zCy_?Q)1ea2;-$}!>V6Lw?L9avJlA&|y73h|zF93vVAFyi>Smg}I`)qfeT#wwX3`39Y z6pnSaV{`ydNo++V*H+~h|J&`geRi6b6MZjrXx{QVA+mMcc|whLiJjeSddo%DVn&FrX%{HQ92N zHoI52Np_9By1l_r{Zoz(y&N1>(lJR{ar$7GbTzy$3$%SiR=SL|CnPpm4m?*p16JfG-f@uGG(Ip8eeM%HKZILD20@0WL=&K#cls b!r+TeK{lJw2Uz1*rvw}>02df1y8HhNh9G^?