From cf40fb38dda3df7496f8e66e67fa61839828ec5d Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 12 Jul 2021 21:37:01 +0530 Subject: [PATCH 01/15] BUG: read excel changes dtype param --- pandas/io/excel/_base.py | 32 ++++-------------- .../io/data/excel/test_common_headers.ods | Bin 0 -> 2941 bytes .../io/data/excel/test_common_headers.xls | Bin 0 -> 26112 bytes .../io/data/excel/test_common_headers.xlsm | Bin 0 -> 8696 bytes .../io/data/excel/test_common_headers.xlsx | Bin 0 -> 8646 bytes pandas/tests/io/excel/test_readers.py | 7 ++++ 6 files changed, 13 insertions(+), 26 deletions(-) create mode 100644 pandas/tests/io/data/excel/test_common_headers.ods create mode 100644 pandas/tests/io/data/excel/test_common_headers.xls create mode 100644 pandas/tests/io/data/excel/test_common_headers.xlsm create mode 100644 pandas/tests/io/data/excel/test_common_headers.xlsx diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 4d6a766ad6cfa..6c9f61e77063b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -358,6 +358,10 @@ def read_excel( mangle_dupe_cols=True, storage_options: StorageOptions = None, ): + kwargs = locals().copy() + for each in kwargs: + if isinstance(locals()[each], dict): + kwargs[each] = locals()[each].copy() should_close = False if not isinstance(io, ExcelFile): @@ -369,32 +373,9 @@ def read_excel( "an ExcelFile - ExcelFile already has the engine set" ) + del kwargs["io"], kwargs["engine"], kwargs["storage_options"] try: - data = io.parse( - sheet_name=sheet_name, - header=header, - names=names, - index_col=index_col, - usecols=usecols, - squeeze=squeeze, - dtype=dtype, - converters=converters, - true_values=true_values, - false_values=false_values, - skiprows=skiprows, - nrows=nrows, - na_values=na_values, - keep_default_na=keep_default_na, - na_filter=na_filter, - verbose=verbose, - parse_dates=parse_dates, - date_parser=date_parser, - thousands=thousands, - comment=comment, - skipfooter=skipfooter, - convert_float=convert_float, - mangle_dupe_cols=mangle_dupe_cols, - ) + data = io.parse(**kwargs) finally: # make sure to close opened file handles if should_close: @@ -498,7 +479,6 @@ def parse( mangle_dupe_cols=True, **kwds, ): - if convert_float is None: convert_float = True else: diff --git a/pandas/tests/io/data/excel/test_common_headers.ods b/pandas/tests/io/data/excel/test_common_headers.ods new file mode 100644 index 0000000000000000000000000000000000000000..03595c18e8aead8b6e401eb38caf6028b42c4dd2 GIT binary patch literal 2941 zcmZ{m2Q*yU8plT$ErVeaC4$H$f*})Ei54YF$mn%M8{H^l)V!-kL?>DhQ3k;v$f%Ks zE^2hc5WR;GE!rD->pkn`uDj3L>zuRC`hR-D@ zCQcUz7lfy;8^Y4f&Dp`)($m2eDdL5+5q7miIiQ4H-4I9{S8KEj0_iD?a`Qk~+Mw(a z2u~O>CI~?JD*`v*C3WmoBpCqkh`7!9PAE@bX9P;v$HlqDpc6R(0XBe_<*4cW68R(r z5~#I3?7vt)+MwzO3Eat64Z>IzJf5D(=hMhGMTO6dFZHBnZ(i$d=*TP-H|wO-uV+H& zF%5f+-(B7><2t1O9MxLUdS4Bn1$OLjK3RI?9U-b^Pq-zvr-z>$11o4!&R`0KWT_$7 za99@c$swkFPz%O@EUB}={K}{-8W`GdO0edp4bbTGDz2`CFYdWGqxaf0HVx(m4eab_ z`9%mX@vBe7dF*ceQN;Cj0k9|5ZR~b9l$DE9go6qO9lYx%1qdv|8yhY&IDUpAz83_D z<4lQ(4oran)70K5;u-&3E(dGCHL;o=Q*!OlVexG=kdrK#)~YrboyBFjpvYk@9b4Ix*t@w{*-?%v$Gy&Nv^k`3?ID6j7%}sot$P7od)qJ$9Ql7 znJDDfwisFMlh*An>6b=T9F#1HEe#CBtiE61tfuq=*A5Bp0PmpZ2TPfr6wt*s)zNzk ziE)^GpGarEr8H1u8D>#CF;96o5IpMq>N4AisVRf^^<{v2*CkfT#*t6|`jo3L15D0} zD*PGPsar72Z<_iJCNET64ka6Dlo{202oS8JqtEcnFVK;f53yrJL&vaTeCYdj*o3M}-I) z+vF!lY=M&M*KiGs1oz|zY4v|HS22v39E}<_g`~fc38dy2nu%v}67y(blbvO@T7*mL zOg3p6X$yW%)d3!&4f_Y;^Bqe+ly*((ok;V`C%#&C4=j9=ppm`@1^~{^$N>LOrOD3XQXP>>YeoQomPn4*hG#lh*86USwD`&^WM!0%|y58&wO_g@)8gS@#4mxHBmnwsQFNo zRrcvcsM9(}EV)RgLGNEP_uB5c#q~W#IA_WI)t70QC zEGaZMVV9{DYhu+XrBoSinyN}xqHwKGH9LR6g0wlGq-Z<_a^sQ-Y~V*56CS-SkL79B zQrqH*6@98(3Jr}kxS1i+ITP;Z*Tv` zRoE6gsXB$8H@arnKQ5VJQmWCLD)z_NSMKhCFGE0jP8jXvrSGRJ+$P`q+hiFlPthpL z-VuRaI?P(U;kug#2Nh3}4A6i1tyG&M7Ou3%#K!AsvD_@fUr!HTn3H}om$^M-yeV*+ zDL?G5+)_%W4vcs`TPc=Bv#9S=eA}^*U#sna9389>e+s+ViZ0h8#QGgi2Qsr~`}v1I zS6x@fJT{W>0n@Gv_omdp=FQj#Xx+Bk-2*bmk0q&oeCzftfIs8t_w_TJTiXPqY`v{v z?!>bKIj%mh?Way!q-9hAe=vld4#P29>>V}>3$%pwQ}(jnnJ^By)@kuA$E5(^_%V^- zza#$a^X2bjM6^Mq000Z|uBoi2Af%?HBI06+bg)IBe!-57Q*a~`rBmL2I`zp%&Z|=J zX6CwI@E)LP1WIWt&a2rmDXV3f;H9maEv?47`7hr9i4?4s1gdC$xA(pMQ;_4sA1k}< zM-y8`EVQstvr7%;@w=8!{=$Q^pxluYG{b5w)gx<&n}$Km(!s&&H)`;4R`ePQO#7&5 zIAM-cIxkA`ak?Sy>g2omd(gPtQG@Ar3hQ8nk&zt*CrOI9zhY#xp^r(ry~}3Ad6i^& zBC2B9jYp!K{Z=0j1>2R6F3HI&@~u2Ly@ShY3{mwznk=~@Lj2Lcr{a1VEnJNR07xUI z;+L;Mcv_wxbx}I?$mZ)Hg5WL}us+)TnoHaUoqoMOJb{4#{Gg^SsuZ~PW78>wQ_>jE z_|2D)YwFw=eSg~G6|}Ae+5;gKdMLD8-f~3H*ba~1F&j0zO3!yn@~nFJ1a8SdB@j5Q z@{ZpAHT0|GYU?{|cfHvWN86egF%DCK*5(itsf_j*M^7;N_$ubK>)KlsQb_%H<>Wb| zhhvF)v`m5t5C*|`hUO=J6<+IXz!a@a9<}xcIJj^sFE`QT!6ys0` zS(qAI)u(aFkUXe{F=nqp6;t|_DgnxwHB2S5A6!pll3({bYl10i86#(PG(>9EL!Ua) zf@J!qf;eld$8uQLFHg}io{)a%OCFk3OtO>ItauhWzzMHF^ct+iepzReFoNIN=7g8u zvrgQLaxjT2A2OQ$k@k%zLudDYl}AD<^vm86^AGsDkvw0$82`1EFpYE7KU>8Mg&y(o z!Z`k}`@NZ6=vL2L+(m2qUHCg97s4K@e*ot95Wkn!MTjwCr4jA(uR?=qP*9$Kp&(w@ LiG}#!m*@UB{N5Ru literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_common_headers.xls b/pandas/tests/io/data/excel/test_common_headers.xls new file mode 100644 index 0000000000000000000000000000000000000000..c02dc69aac93b80661425d773dbbf4bbef12d1ce GIT binary patch literal 26112 zcmeHQ30zIv_uuz+s~e?3BB9%;G|#5o6hbBQ@C>Qk2Bk=mAH90XkTIE+=ix z{Ri6*cdjRlqaRTtUxZpjLxRqNYZp;okHCGQ5VN9RXSfEE>iA!z0WB_cq=q{ACisfx zIl?9yTte#M_Yenjggk^aiOeJ&2nmV~Pl%r#ACbrlj);g37x2c+79{ZgLx(RVLnsSn zI17g74Oc9Z3sik#Oi5-yo!e9AgH*Xw)Y+RDkafM@v1WSM`}Ty|1nP}v`%vXpQfG7O z+z!qxa-6Djo1(aoZsawT=aMWmbzrT@@*|-nmPC^_Nu;E%wXdNq z?q4ZEpoDTuU=gjaVzl`R2Ag#JN(mNNq_Unvt!pX7=*Dp)>cE3#PKtTv#DdsEvLMbR zgxHckFt}|9^Ufr=(~aW|&cBPIY7@ywA|TVqWF-K6qDi{S0yqM1W!&levE_BI zluIHB7EO8(SHcPrx3~x4$>>A+E^4P)V<=M}(3?nOSl^sDK%#YtcYLVpZX8D_YfDIo zO%I-hy@iEyh;1Ldo@7JiCeb-_2Xl(<6JpzqGY@*Ew_@=m)g~>1w2;j$mM4?UnKSeVhGkp$a^7=`lU~2c`7<;;@__a7MPs)VAJ^OK%n%0 zwCN6K8>N3)`auQsOa=5U3g|fs=zA5=_bH$&l4pa0@&yX$itrVcKdb=fKP^u&#TVvM zjQ%>UfPPE?{kQ`95e0O*y|R31dD_azv!BAz717t}>u+qku`S`y{h}|XM?{=4J!48Z zv^?tILnu4^6ADm=$Ubcq(Crn_d&s4?2TTh8T@!rM_-Z4fi|wDbLroFAh75;GQ_*;Y zDK3pe(~U%Qt!6rpkhD%RbOv7?QF*a`X#3F<(QC+5xa=&#(H7Cgax&~QS6|<#tr6&% z9(V0T%l-omuS}lt|3`~3KAdvNZi;->r%K5 zBt?pj1=vIf{J$Dpu^@?_5ba>T!IrInMOsyXo~*Wl0}9eA4xH9jK$qGo4%jEHs(?n@ zstVXCt*U@#-l_^XzFJiQ9ZIVz;K*oI1sr;$~c zLhsgA=p$7D$DgDVV5M;TGrR__AE}AG2D1X2+vHAgV2WnmvLLQ40TJ?*gSfQ>#MMn2 zgy{s`0i@YVWSS!@y0e%odN=ULLP>?OInaK*QU3c?q2l)2NP*-M4f!C#i^@Q}q(R0h zkVM#!D+W# zeVSs9Mo$om9*wAd8KFeoB1~h6H6^R=9^#gv{o^6mr1SFf#2_LY`1h%sIWWCJ40>Q< z%b;JRZ9ujaxKv2@54J*PIwKmGLSdy$D+&t>WsC-COrt>>(`b+e{HQ~A%H#y}=2oB` zNU1i3JBBeH`$7ed*uBcM0`-o~lXPm5E>|=jW&M^p)N1zIkdf_VCa^3BPK=vDWTZnK zs}Vtjq;#?%FkSyTNM)dMkS;9&VLE^2=&_IjfsA~e;G9vhFUMf21v*g<(y=8Vm4V7Z zy0!$w)lE5wr4$HcX))Ej`H{~E!QXr6#uM?d4 zE7l1Hzb(*-auDN|fK&!52Z8wU>yo&-DF?BZ0)dQtonUaISSJ|Rwm>J!K_EQ;I!I-p zau8liKwRCFgLIbyfsA~eVEm?7C%DMd0-Y!aF=+`%WuS5pvzCCkx=DhpWg8GZczh98 zXDglquYY*BATH4*n;S&3QFToRi_=Z011tH?#ES4>`JG4llW_PaAaT^&E>r-EXcJA2 zAfm4Rtxz4W@o59I04rRZKsNeJHmEqyEkz;Aym9UYtw93Fcckak%W6WeD$I|SSu#vR1%0S`M&GOlF zX0nlEX--PmNLpHDVCk(_^4XX$*~qapHzjN&Ev+)J{J~NAY`P#DPvw@z12uryb< zReKclO;;uxIhJOrgpH)7xw^eOlp()0-I#3TSelIzHjQ+%vA)k#YlZ_lpvsc1K z($ZYrgmn?}+3=WbACz!3Ju9{04Y(K-EQ$$g#sHPHB0^rKxX1^!Yz77U zVS-vQKqYO45L`>3p6X0Vt(rl>T9}}=3{Xi6fhM0N@JhJT8L_Fy^An z=JKRPQCEYe`+-Vkz`sxsfDkNepQZt2Z5U-WV6SC!Sss}PPlCzfIu2@)OQ7by;bfk;hGUv0)No|f&=hLR)R5-YfjGeooD58!85V53&CiBvQw!!j@e|$U`Y9DD_ zR%aoH_=D2o8K#OW;vy_yCcP^mG~yO;HmhqaEQ|b~^|%B*4MaXJH2`M3t`<~)M@x~;J-Qi^B)~UnKHAxWugiiqj@uAU_!ly1^2>m%&acpn1=x{wlo233LOg_(NO*r zUY3a3n+tDLnhYLHDviK!bd#2qMS$fnu=E7VK_FC<_E~TV-dQ|edg6E)7N01{=7vdQ zu-m|RL5xHYia`5Bo);IYMJ^2&nVTIgofEkn}H5?G!q=rNkip=_wRgyi;NPcoH-uTd63t7hUW zU$Bi)Q364t%NA}YvPFU?ZMgLNlbx`&e5b@cthWLTnFSBGb1W_-LafTzBR0breOAD$K)BY z`_^0@R_3fXuyOFGg-;xY<`(*Jt)8b}Ty=5Bg0nwwwlu2vrEvF{zZ(OqNBidUjAHHU z+#0{{^C+RzRP$Kbrrg~_55D!avurT&pPYB!E4Xg|*v#)bcj_COr@QH0$>~!zUQ5!3 zWNsOmH~;C($iPDHRapt9SwB|))subMV~%y&=d^ZLl1BX6sUdSs&qo!0x1OjK9qKyX z`_+?=n=Bt@9WCK|4%;`>kGt^5+3cDZ`5y;W82LJ0D(?Q?@khJN?N=t8AN|qz(6cE5 zkLw=!_jqsHt}rM&q@5XZEs|&K@bpZd?=xTAxj7;B)ZJBus}^`psT_7rXgKvf|5^JR zm8la`K_fz#KuYT?@8<5`&Vrsip|fzuM@WJocD9ovo-kV|5)&-v$dBAtt^$NY} zp#$%@hgqiXUNbA?mg%PNjURXh-gs{`@|M@s1FQ2Zgh$GpE58pumwM3t$v{q=*Sf&y z$6?#^ogR%U&B_<-t?5>E?8L{LpP?zT4UL(%!Dt~ga2X_gag-P(m?3bII0wO0DN>Mt z7NT7l)TWH5=To1ueQs8rSNKVrfnkN051jl!{Bk?@zxYG-wmY|;JX+{=^}b<(Wr8b?OOL-^V87cW|MuW6KiS9DU+nPod2*+Uqbu9rAM*3;5h)h6S&Q}`yUV+I z%&yk#cl*0jiY=#mFZUn!>_(&Q!+{NU)@|55aLTtJTE-3bk z2?6mYUV5iHK1&_{qV({z27Ti-Y7HH_AC9#cIr8z~AvKN;uM+x?oResmJ<3VF&5r|} z)c?`2-KO&otj_PMO~ZNpbG|HXS6uCAJK~MnpfyHC#4jMl?Nd*;K5l*H|Mu$0(tmW{ z)b{+tl8{qlUVC@Zty_Ab|J~GIe_t@?PGiP^H{20Mk4pGu=g*w{<9YYFRYlsfGQ6`^ z|Kz#%@9pa+|^HcJw?@zFaA9Z3y-te}5m*x-kD6({)<8Ip{V^gN}lY3e_l1x7sAHLD=TkDk7 z=4SOvNp+p+%b~l1)v9K4y9X8j(z_t%N_Cf*wCaQH?(Ux+QxH|J^Le6O?%d1&46paFsSWw& z+}z$lhjuKw^@Cx^lye(Rjt#ne^I(75(}n2=jpj_u+nX1%(0Ay=iZ&&~55#(O+_b^Z464qf9LPzIns84igM{^X5X<+5tB|YyME{#f!gx zs@ME4f4%q^AL%qD*IOt!eSEB-;^p?9&QIu5?95>o0e& zg`EET^J?8aOWOaoEBWu21#=IElzx6yz)$_h^QzOAub<6m9FnnUZd!1uzkRw|?>`^0 z&g2f@54e!+WA zhqcOA-SD=$P#CjOyI1>I`_kPPVt5bThiuc{=W}V%hryh0Hy3l$3monwZcEk~GD${)9rGCruqhF_3`gN?hw?a2)(!{~96Vq(}xNTvd5%4st zY@W_;eunm=IrBGOH8-iS+I%AMl|^~rwBUve_S_MVyShA2pSw9_ zNq)?0wPm+T{?$*H*8r*y_#-8@wH`OGl3F864vp3|_MUD{NIavHzc z?{8K(ePfBytK_Y-E~j=ZzWCsD-=e5V4^DH87epHmezB`kbH)w-n%g|xP5sg}%GSsA zzmSmXxZ|IDjdFTFDgO|ZbCT8BqTqh!;01Ai9I5%RPPnz`;w-O8!DWxe#^&t{DC*rX z^3uJL3y-vGG>whznQ?G$ zd1l*?o|avztrhN;b78~60ZE7RSNs@RGSyvE zE2S_dbj0jKC-&DkO*(d-oiCX3oqwEex0;4;i#g^zPFc^WYyOvl_Y`!zS9|r;u;7Dk zyE?F{!ko-`jfp|CCeQDiRN`La_#k?DhJKu<)(qpj4>I+-W&C!1&sZnlM}?;kJaIhs z%y!hX&%C1voonxyhRqrivhdf0xE~I0wvBqR_2%bGV~u?ymS&gm7iYRGSZT1d>-O(U z^mHB^yFC2uyTxaQMhrQT0AEy5eGZ-*vn0tn46o{?5R4!4|vRGt=J`EL?AylYVl3{j(aAD%;cy-i1rG zJ6s9=N}k`D<(YfNAqSIds@H@3u$Cz~`NY-qH{9bw6km;Tl}udwpc$yD<@J zkEVYc;hZ0uJ{>r&bZug!OU#mi#^Vd0 z)Gj-9ZhC0g&(RT!f=4wbT-xuDaChXktT9^7$q{`2+ESnX&iPMrCe#={s&(I&@L@si zfv(HdzV~}+KJfg{f3071+v=pi<9u;f`zd?gg2HOfS0`T|H$9+X+|#EMXL$Bp{P=ph z2ta9@i$8J)>z+NTD8AME#t9=jwj!8#EX|Y2kg0yOF~a?jy}0@YeoBT z*LyBedpmP`KVLs<%efgRUaayNX*4XeuFteXQ`1+)d%f%sxhB9S$l7wGjibw^gm&GB zuFb7lHu_`Qfu1{8k9};ub65YLPgV_XOiKCTO%7|pPnM-&dh=_E@rL1BghxL2?3@|t zWNI||%ZUE7>lUfo>?p5%V3D&okaKBbNy&x#;-<6M4zQ6%(X1AVNgQ(KxWfaa0hrUg z=GxAnPl-t%vm$fa)LhXR=97}0z*IjIi8+U9M!G2viJH+cF_=csj8O-Hhom%0Ok#L| zI&SrVcu$>7hSt|$+(L37(8-YqE#L-O0kjnoG%QC2In?*IdQ#upwq+aQZR&tHpDc!5 zT&G|In+^o07|ydHRMVxd_2EoleU3{$?Rirj6gHe6LeYCrbUW&tPEpdRpoYe@f*_g@ zAJ%tp=Ntq)1i$7b3#rl>)Y%$JM^ML8es_s$8!^f=zOBUdc6iUA28b+>j|Cwe22*a- zaf2`V)jg97E0(zo9nccd=f1d@MIB>63eXO$iQ$3L(q}Fkw~XPn4=NSnLtTP2%rI%F z872)D_oQf_sXic}O+4rvODe~P9HP_r)F{0ZQVPK)QnB7^K7D3UWmq7&F0Q&a<*)|I zk2O#tijxn0rfaaN8n8e?)nG$iU0iCRY9J1lPvb;2<=Mx!M27X%07MIE*^-r94v0RR4=rFuQEYqWQ`Fw3Z$kM$?ZrYC5nOR;X5F; z0kqvBn(Fw!r-3i9;?b6awk>@i<-wwDZ$Ab{=@qQu61(n4>YRaA2?;F`@B2VPTOSMw zEhh?+0i>mna8YI-BzW&lPC&v1l?#w4{{;_W+*pS@^yL0g0id_!KE~qB^@BpgqvI0e zXGie{CWi}R;hqm9@{6BV_?%lT>iW~3beFleSZMaDL!|+g22>hQX+Wg`l?GHAP-#G= z0hI<+8c=CKrGfuX8c-Jhvum%^<~o|_t=$Omzx~Gp82@7o-?UK!wvEs$Oyl7fzd)P@ z2{!^@?2YSoxEBoL{soZm)pG_UTn|_QNgL8ONRsOT9LVEA$LD$!7UmHX59V(-V1nxq zxE?`oMDh>R<(*VZL7%*eZjqx|`OTjH97*pdreT?;S-x3DX7_(#ij=OYlX9>pm_~(o8 zoj=C;qMbz;>*IO=zTd~cID>y-1^=Q5zOTnWGK9goDJ1-VBh28}9FhejOGs9btRcY` z0qVXj{Puuk2gx3i10?v?1HMwE?|%ji#8Dmpng&L~XFvEQ6AwP-#J`?`A8@wjFe2*I zJ|X0ecEsc2nUJ&8nGB+Tq|c)NF9fOMfO`UbsuxDd5hGo&1v%JdCFYMZw4e%A#HJrz zQk{&VK7zt8bg}LD9b*I~6H=N#X;y@51N?Ce;rMAOe;$J2;DJM(3;=HU3H1#4Bnx|> Qb@KH57us)wjGOrX7cA+ymH+?% literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_common_headers.xlsm b/pandas/tests/io/data/excel/test_common_headers.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..9e5187bda8dc156a764881b231219ec58d6b74c9 GIT binary patch literal 8696 zcmeHM1y@|j)@_1oa0woq#vOtM2n2U`4Q}0Na1xy0!6mr6Yj6wh?!nz9;p>dN$z(=V-d(Nt=yLauXeH5gjVXy&k00aO4Kn@@;t#`A70s!1$001li!V_&#J6o`+ zEm&X0&E6EG%j9ZfO_mAsgf;{41oHgtsQHb^ZE5bdxb;sZtn$^YF<4 zCcUEI=NcMWm9|X>>AP#0@jyTv?xgKm<#yV>Ln(Y#Pm(n(hSf3B^tQl9UuC@+TfOq@ zU69uZo6NX{MYF$O3Dlgqt0!kdDYSw36dJNx4mQavpzOY%^wwBUSS|da<2Z<2q6syO67qAdxa^EOn`wU6&5cJFwa}zjL}!w zlpDT?ZxwaUV4ds}@hx$A$=jUiE-g{6DAtzO(p+N3U4apyiPy+pI@wMZtbyXbYZ_3L zwmBhQ2b*62;r;W6L?q-+akP!vk5BuK4@W#Tr;)1FrKM%J@t?n53fAmghy#z%cu>x3 zqAy)~Ag6voB+IRVYCf^#&dlqJ3(Hth%0|&y{H%UxENI?DJqDj$EOz6K=WJ+RbS^A{ zu8exmLv%NHB{ncOXA{WP|Lg*)c1P~R8Vv+ukB`s*g}?E%MwNx)0`iXxgoelvp6WZA zT7#IGe%$}Z&;Mdl{xtOBXj%DA7L!bM}D==Q5=ei{eq&_r6eH!&cO+up4#!FxI^(;7l!lH z<HZZ8@rhHh3iJ^s^=F7!IYa@t0*Ss_J+hk1MmMEUQzFWT zB?0Ai+!=c@Bgx(~A9Ht6g@Slx4#pF4`ani+=1M*KtjRBLpQ$RF^IDV}r8)9Fchfhq z{(LT$+=lhdgHbM}Pw6=a<_+73L@!mwrMFf!$LV0ITNgKCXHnltzh5Y6{xU?e{z(#F z!Pxk7$c!{XNb(GT0Oe}U{5wxv>>RC(?Ch+5$l0Hqfr4lnB+GyIRjec{-Nl05g8V&@ z#U4 z6UH6O`uGK`-O%|EG=kA#35lNsEEeX$-hR;`GFlP{t3fcjk0A}4=j#nE6+vKD?sKt= zhVcLn%V8ELHcQYb@c;xfgVmcpckh%BO8lzp>d2Dl%(fmIL>I2$(*4{>$Ib~5CHYa^ z0Ao9oZo^jXS$8N~L`Bjhg`5H&0bsJgJ_%B9x@gc=W@3}-@fuj< zet@oR;f!JM;@E7nzjtZ{@}~a@nMi1LF%)P3;1~`7cnWy}0-2w|Qmm?Lm&}6gnO^AZBsa3>X|}h?}zjG3m2;D)MKj^=6FOi`0%-(P^qWKQOkDHd1x&| z@mp#fys6oqH>EZe^f&}%T*mh^LmdQjAIGZ)%*pAwYDp6+nW^>LQjXs(mcEmg4_psH~ROOR-SO9=JI!4-UrXa^xtO)gty;EdQi>@aP#t4MsNBT_hE$c z`x{pPG8DvCCT8_sw(7c6U;CWE{NiX5WnRl zPj;U^c~U$}OMFb=L`wOrVpblPzZ@{=$wLwgMcIE=@icPGL$|iH};TW7NbM zOE3pd89E+qR?_q7++tzoE@^q-7U`z6G8Puh%Q>u->lHYJo5B3}ot%%j7xDcruO~QL z0<(QNDn(_P$$?ob5fcQb{&Sz59$-m$+dIa$E_~qaI(te%c$kCYg{lfLO1E% z>aeH~nhyRU-v;pr7!3$#0#3Q_xuJ&dNqA>Q7#m;+L4mP1dUUt--j~-rPOsN{<#y2wqAhw6S~4beONC3UN(zBNR-iSb`fgdPOk_`jMD|U#PTi|J8#+cMTEGMt zSz`85q{^@6u4!Bsd45Cq>HQISC!lp>7~XdyLS%&C7J4N3TURoCryE;3EVaXF4*ncC=34+E&4rHBGf)EGw zcjAD+Zq}xtA8Ty3s*TkG3;K7#S#R{aqXFj)2E^LI@kxc9o($rHBBR0Je7!KrH^V*O z@8z8Ji1=#9yn^MnCqN@hAf@qSIizai9>)?kBHj1+tg3q9QeH6GlwYQUC$!izcBoE_k6a1KKxT5)HlTa(d4XZ1^M`BZ= zb<#z}0{O`pOm2n9qLa335v3Rj_imDqkFEr8#Vd6d?P9sWj6RllvgW~!=9Wg2iN?{( z_s=6XbH>zWj*#qwORe-Om&!(DweD^4;E}f~NqLoH z8wbtmn{U^?uDN?$c#_@>3K~$phd#^)$cIU$ebh4dCwdK^8rm$O*dGw2O@A|Qe_UL! zX(UvC4{DhY2A&eRpGdVuIdA^%daH;mrEAMI+Q)%OC4+Qw&h<2yzR zE10yqe+RIO^>tS(8HxISF(L~n4_icx&goHgSu}zAP+9}Z@Jdrlv8~SRh-dRjL7~kv z5#hhveN@voaN$%cVaRSOV?(*_<6bitA%_QjCgdXV4&yRtBfxdKq!J|iV6Go2m?NQg zjN*MYqxyN1h-|cRtHa?EzQae;X&d*Q9@KW3-b#ux?v7ZuRYgCbKEK2 z9`cICjHt}1vTtiQvdA4XlGI2Q8P(p6(HoQ9_6p+gU z=SBq8^6H3n%o3$ZYAjJr?SY~iJ6m)!A=VQr-b*Fg?3{QJJAv47VQAEriwH!@>Rr+? znxR>Wk}b+)Ybh;f)KH1}GdxHyqLBj& zsJ?Dh194%*`uYz;NFVAc)LmF9h)Fuu z2Qa;_v2I;{UQGnr=if61C_+Nm)kYIro-U*b$o`z~_k)rQ~{lH;Viy zLS=m@E$?dA0@VZprd$iPB**oRlouoPK z=xTeo9gJ^#JmtGAUth#wZuLAn+l|3n@x0uYk0733ARSVmY4GI4K22sR=DZ=eDR?x>>WE3I7;wGKo}s?3G#6fO`V}<p8A#~?=#tiSIzw!lMK6}}QgtCEl zPomq!6)o+!Gu{x2&mJYD`i=MgdYLQFuENTHH-u@14e zJ$xcMf0WpG#z{@Xf1 zkjAJX``~uNnEG>rIN}4TVqo8}>Sc-nlxcLK)ij-BJOa@4`ZU(2{EeN1pDQkHFvd#u z?K80@DP5ke6czW>4`{NhoKGN|g3t-N8Q2|`=%zNw{cr@$H}UScM;^NNKrg$hUIYDQ zfx}i90uM_9va7nQA#_D6KYqgi1SAx^CBGn??>b_sMOxNJvB|T^Tu-5~#noD{Vm8CO zM}jM99$W@UT%bU3uo2p)_pyA*w5Ho)zEz0VqzqQ>=4p$c?W2o9({8FDy{VhW`$Dq` zJ;5^5VXMS*t)9!P%a@jk%^to3)W>H#Y4Xh#TkPbqM}x54O(HIyFwJ9O@GSWYij6c4 zmQcgbZ<06@7#3qG7&jcKg=pC3BSulQL`|1Ir-aB{s<^UKXBcM(IstLgM@#PI&RrwI zC*VIaOsyZc_1mi{9ID|r+p^=ghJ@67WpM^ z(y0LuoW<8Sx!v@8li--$z!#gSMQ5UumGn8b-{cq_ORbn1&@iXmq?*?zJwu_}Ss2s5 zu*qsR%XNlSRVgl1voj!|$WLIKy8z=0G z7d^g;RI=o?Dx~IBJlRa0RYVtE4A@sPcybB1lTw+>LMt#t7v?f3+TH<3^-#rY3gdGi z#;w}qn;>$+;KuY(3<7SWr*tZ+GVIuS-WQzqL9oT2I$S*pYxhvUDMoQlL=NmV^~nZP zovv>xl2G_IH~251`?d{Yr;nVn^IULdr~_?Z=&~_2CePGI8Zi zv_sc7l|8(Vjm1X@c1yfv@t5QJZRvwWW7rM;#dC?p4z^ypxFqXCl@EH;n#X_qzA7`# zhbO&L)dF91- zB2pmtUE7h9-pB_C6#r=$@F|Q41qEB??45a8#Y~TEL{4t4qiPZ7K zDfe?hsG3oMRg|1~zlwMPuy`>q%wCXMuyYt*k5Tntx4hk%ut(s2`8mon8&hrY3APfJ zwtO8X)AocBd`P$A{@rVh4rBuQ7wH6MiUra-A~bA7)n9WvS6vC{!%N=V4EicY(kOD*K z8eJL7O)$?pqY%cp@^Z*}lXxIBhl>;R-MqHm@FCR(ipIg?y_LNTORqgVN^(@thu7^A zCg+BQBxF1RNR|r2Mwl26hfW5T_xr+SGAT9Sz#bRAp43WM(ypkY1Iq1m+@DjpZnYeIEJ+ z!L}F7m)!x|!Z3pL#38A%@pDU_6(Vk!G?Zawq9RCadFSt#N|zgnguXWIbiVSGIVUwz ziiDL#pi*8(jLf?^Xxh~OR}gmSX(B{IKu8Gj=g@x!p`pF~|1b!dvOlhrXj!|3AI3lQ z9VyBZA+L=hif^&eYxRqABdBQSH*&egpDHPXilYu@hheC(0%!fa#9i;woG07wpse{bQ1Jn#1F9HPfD$>4eU&UZn$EF1yP{=mP%+XUv}VBKyV=9LK3pk!w`@&zDijnCT8}bZPqnXwUf}=@a|`my z-*vtvZbbN=K$y3ad|7ru}UX|5p}Pg5O8*>ki=841d?tzv2M^ syk`KwKh*Wt@V~EwKZmCf{S^Mkx~L!x3wgT$021Wm0}*f|(jVXc56TSxzyJUM literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_common_headers.xlsx b/pandas/tests/io/data/excel/test_common_headers.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6db5d1bd089f9fd1aa040967b60ae93c9caf5139 GIT binary patch literal 8646 zcmeHN1y@|j)@_1oa0wnHxVu|Oa2f(Zg1d)q9D*iT;}YDRG!irrpmB$w!8H)vT>@Wc z=DjyFnfZRfd(~^*ySmriXWgpWyLQz%t*(TKLgpa}!29bC9iFP`7ZuCXi5 zS4pAq?*;bYvj8PpJL9`c&Ga*EZ0<9|Zg6FCh-?Kv;(wX<<~bU&u5xs!3k>c4tbqjy z=xx<9Bp2lD>e*$0k&wlK^mGrh3CX3n$&8F5EzaM8p#Q-!f)< zwDFd@8?Z|CiiLaU+Ll$d7x9GpOU7}(jS@`Q+Awo+oxQEKsM_(PTE& zXaHR&DYbx^KJiW(n>|?M3XW;USr}eI`P?E%rNZ~9swDihRIfY6juqzf%Et@F6a_g| zzit#DvNmGZ&y)(F4IG;WsM|qmh|72!zr5*`B+64BIBcg`xM5f!py*s8H*d_0jxkpy zeeA%tT+ov#rzR}O66jmh+xJPRRc+6V!pE_26m<>h>E%7)Io?*|NvjNQUn?#Bn~=Tb z-Yrq@-H~N+$|MXiGiEBN5~ZAkCM1Vvcoc;A z%&*z^wtsFxEONV-_I#BO3c)87rK@u<3r@Orc12}j1ihDchOTttxlNu;o~Jxh^ki~t zj%NArsW?ZeZ;4KR{7CL2?iY@ikI)GUD1wQ_Qv&q5Rdg3kFUt`oWi8-19ZMnV1)EyUl>7AB)1F;D($cgRwy8AD1c^NMG%~kuJ&{X;5&ZCB zS55ELc+7`?$@Aq|4@35upI!~$(Ljc0ryzRAhu*`!KnQj5BHW?=E|LI=grpPrh%~}Q z@(6&2;9<}CyH4DlKz621PIf=t>))C|fLjwCIH&eCY0^Hqrx(J9#TV5rCKf*)rUk>IS2wKyCf$&0}LKor?& z=wt{H&2+DfD)1RH0sijx&WAlroK!GDgG5{}TPC5<_e&-QvXH#O$8x6)W5Im3!(6UB zw%`%UemG|aYS#R(-)iob1=iHp)1)$6uHQGv&Yh!V1$t7Co{*s{h+%sIMmHyDSufk3 zo}(Q%-CC-64%@ZmU1P0Nd{Ck)Ku;LJ7ya= zJ+NHs>zQ1F&-C9Z^YE$f+&-KxF!-wb5dH+5GJhruR7>9}jf>DXtLEO{qi7llEt^s6 zlN_W*MQg4$Iv!EA@>&B=XB#?h6>}to-5xJ0-$Oc2>C|Gq6m1PLBHO0B1&mo{REQkW&1DU>8tc`UTXv*i0 zwQ4HB*3o`7A=mZ->M(<$h@q=dy4~-Fd%Txg**#Auw<=ERQ)?=$;<-3k1N6p~X3h0) zVbf(rtr78i_)4X-(5@_fid~9Lw`)T7q3t7AD)z6~B<8vSbE3Osq6pCs{UWlRgX3FW zKrewy#93;`D(X4z1^2uXl_N^9@KGmm|kC8aH64a@1?cx@CN))IyT3xN%JS| z7y_5y;0}p`z=K;?;aaC|oCZNPdukdx%36g$yppOwW%^G%Qv&4 z&@o6bPmx<>l#WB$)){~WqhYqDv-M4Zb~JlC6TYzMfhe3S^1EqAmV{)P~GsZvM>rGZpJ&{DMX_s0}wUs7{CjaLRKb=#QGp|Lw53h7b6 zWfZmLcZYfD@v4a-owkEvTQbCI%96c~mBxgwP!EZFJWAj9`%8R5dkWsAO4ICl`d4fS z;{0yzQ07Zw{DUZi*Bd6u-e{I{iyl1~TJ7Pn)D%l^^6U*BnC*smkv})!O-qgr6UF%9 zDOPPNnXc=*&8RJe`3AE{cXg0pw{r_>We&@pAW_QxdLciKd8d+=$e8?X4H`Ldk)~nYeWrg7|Lk4C%Y_cU7@g4bq!B0iqyir%J{k zxs?sEw}@op^t1cCwX?XW;EGU7UHlwJq&~sJy@uQm$out7yKKgWVh#?w!Q6AMW*jzuPx9^tyqu3H8nHY|7ma zrUPNch7t5u!`*i`s&0l9B6Xv_VX7PB;4cecjj=RUj2g3UP#F(}!8=lJt?lp+D#0)b zqBbnLo#N8sk#Gx-FKjJ+OE9IfIn47<7K60@>H;158>z^&b@Mz5dx?(8ftH z*dHTJYO3yH2;z zeNVHGkQ!W0PYWCq5xE13o!VA)w+>1Vvzox6{hBw3d&K~?2!+h|de%V{&rvfVP0xUR z!J+yrm$R=Ape1XjlAmtC&9fmweK{8r;T}{WUG20SonczL=>BwS(h@xpEXF}N=i@B_ zrx=V5{#)Pf@S3?@sJr?$0b2y$xAf957;omI^H|l8KTzWG`&6HO7{|UXuLWoOW@@K9 z*5tM)@%X1>F%_B1h+S{pYa1E6^J|o`<$tc=!Mf-bT(Op+MFqE#3sCt*2pGf2h+WSZ zBxs_njbbDUo*5os`JGQ`wXRXnj5MydJD;Jp`|G-H5Wh7<*r+gEO4lUbl} zPF|`05vJ9Yc*MMoxnwgXYkp}j=~0kd@}*7W!q}wA_-WY+*_~(oEtk;(kuw)&QnfK? zD9$*O?WHtNgF(4=msmu{oKYU?GWHbAui}`H<7+#H2mN5qv}{>)myIkgbIWeh4JeK8uWDgeXLnwF><4a%O1yl?aqBjFQ1h~ zVMI35;M?D8APKWb5Bs59IsGG_+emO*R8awdZknGb+uxRY7i$X#3(nuJ+&@{pt2+cC z7bJv{TuWks-EaB7MKdfef3c5Wpf}G*ps20e1#0v1#5IuD4sON?ad3M)aRCJN)>viICG}fH#^(+~)J?_0xi~6y6 zdc<@x{rG!7J0T`MAGPNR5qK8pj#rp$w@Pzaz92k@KD3@`#49=bocGiU!gDOdU1-6v zpIR-20+tvfdY-q_WfAvKPzEg~6SEd*l#$ZKT^a_Os(ZiTp{3i!PE_>sE@@I*;f|HT zj$m4PS^AM7=i|4IDw)uCn;QF+w6n)1ncSjOtKMT5O!)Y|)>jNaHc09@iAS{=<D<%dfo*_h{dmZ&USw2!Rp3}5UlwE6ASC6MRpP$V@$Bs zuP1XpzdX(qUw|>#A9gh~m31BquIokY^GE>v zA861ktoC(bsg0mjeS$CvFL%x{jHBT@1>-Xse|o`;t*CWbArWv_NEcqm#gm_CU>v4H zu5s>R+S#XEcgvbRDM-VaDeirDxOEZ&6Zbpayy`HjYN%yVaztg&)ZCwpHoE`ugJjVN zc6YdaB)B#4HIjdZqC*o{*b9}rNPqOspY(p)Ya(0e_GWn1;?Biki zirE1p#3TUc<_JMDqmxUfGfoO`)2(=ap!b2HKFxN!Yvo@19EDCda+4U$Zl;m?sDVBz zr9%EA^1NAu8>zdU*0Yb_fEABQOsV)YdRgUZJ^ku}MeH!Z?q(I^mJtHb7 zE=!?a#XVn-Uf2G^^6@9+HQUsLC!yCC!;~MJB2t>ENn8ReAr`Y@bm|TCBPP598x5l`9~&o9?kYlAdxy2o(v1-;;!5qlGJ}%PSS>D&5*#Y6 zoSXwah?&Chmh!J2$t@@v2(72T@XCn7QCa4Huui=O8E2kC-sFn=3{%*NMANlO@*>{% zF?h}D>r~xiY_ur8*MdamV@pPJUVlD>3$zOqGYLk+z#>@)3?;gIC71C*&wf84Z8}Zh zAtIr?b~8czT4dLku&O7w?)_Bm2w)WCX#Jy`1d%i*iw(JeYV-?gHajot%=y;WaZvDA zPc5+FYe{*J(e8MvuJ%XhtYq8Lp9(0^Y4g*3cDe|c>PW6Ju0WC%HqoK? zH@e-8DIpU)Y7M3hJuj^;A>%m~cWAod`tTEjQUbvz+D1C@E{On zqd}6|mFv}(S2I4&2Y56YH?{rQx(;r(?<{M@nUI=Q^G0^Rio7!{(?7uWTBsbQ5Ibz9 z!^WANbX4CF#Ha<;>!aB+Sor4Y7NaIAiWb+C|H%G9!)j){&iux_1#n^ex$gcS59c5O zeO@qpJ81ylPLlp^zduh}U~3Bt7cl2Vm9z1 zS7Y*9ty!=!GFNSe`Ae;(W<(pE)O@WPjhMlRA48JoPWa+tlSDH?V=@BVu7v^&Bd=2+ zmIUJ!M%wYBiZv!G!z?pW%30FmA!~bSn>3T!Nq}IQoKbG}NE_LKv8K1FTP5~6ZOXJ- zLs%`Pkp=q5YzHKhiR-iH^Q9@)o3Xg1`lMA%NFmC{2H26pGK4P%vwaQ1wX>}GVaP9e zYZqz>IChBf?11Pe^Rmn(o|m6&@=bRzZhS#L-F@-xS&(RVmhm1=mdQZY5(|^U=XFu3 zoO7`HefDnDF^hXa6N)8`N|FVHmhAZ{1&}4;z?%Tiinl|kI`sxD^I>9zk{OA9S&h|K z=5=V_37obn7$NS4A|Eqo(pIW5nwCjaa$hZ@yBpvkKUF4CO}?5?%w3dRLZUdeR>soz z3&!|_09{cZn*p=0*QH&C(vpWYW{u#H37S4+exaqzOIYN0%KthP8Jg4X;Zs_-jr{`{ z%Re5|zx}yaC5+){bxpm}z(S(0!y7kk%-AI=(tQ~HWCM%u*+6bWiRfu8cSqdwKE$^1 zXETi!3kD#Wduq)1P#dSci?T#`1l7kLY&B z^~189rATHq=ikaGg1xpJzZ~|&+`_5&_mqGT@MNb3PEaX0jR}7zD%jctWMQu90{xkI*=#ExB^#U2FF#c{C{S;s$IxEFnl2Nj`7x@XV`qpW2qa%5n%IyA(~Cxq$vm3Jgc4mUnAzm~{%za~E2gZ9Y%- z6Z$9*H^9=t3N6!uu7~K|7z~&Ra@-hFJYDbRiLI**g9Vu?qZC!`7;>agE3JWshHJ_M zg;-)_1#LnI0*CZA=;O$eIX7kKx)zTCT-mLOw zb9KP9Gj5+dBv?6B=1pl%%tAS7HqS-`7vco7W;qgXe`!B*v=##?5;-Pg55cqgp;W#4 zV@L(k;arE2?1uckhLODoj>mFdT+XzDXW#z*l+uer=hU}HuF3Q}=JJ-z4XH3!Vc$Xb z(GbY@I5#WRLbIGI`cP=>?vz^H=){cMY$P?Op+s)+ zK28R!m$G?KMJEUt{-7DX_UuMc?0||x5)w%2gU#Uq?Zi`<--4*w#>;ee5|{K6XR?g) z^du&j>+SjzH|&9n&`S`9ZDt2X_`WW`;c}w%djHbGll7=|(+Oh za$eE`x217eOXnsOVYp&K?Q6bN&dMr!#2G~~dkYno2*UeLXvN0~#h>8Cmf&NS5o}|Hmy59d?_%pBx{;vOj_2bt#zb3)|h;#t|Q@}4N@n6BerV0N5V_^LT z{v&DlEA-cl%pcHMoPW&E{2JibJ>?$(bcz4-;(yy={uj*8sV>I w`Uf5W*n~&;o3;K5|LaQlXSglJpWy#m7uA)J;j;?>V8B2AaQEh-{`u|y0027e^#A|> literal 0 HcmV?d00001 diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index cbd241ceda0b1..1ce447828f5b9 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1278,6 +1278,13 @@ def test_ignore_chartsheets_by_int(self, request, read_ext): ): pd.read_excel("chartsheet" + read_ext, sheet_name=1) + def test_dtype_dict(self, read_ext): + filename = "test_common_headers" + read_ext + dtype_dict = {"a": str, "b": str, "c": str} + dtype_dict_copy = dtype_dict.copy() + pd.read_excel(filename, dtype=dtype_dict) + assert dtype_dict == dtype_dict_copy, "dtype dict changed" + class TestExcelFileRead: @pytest.fixture(autouse=True) From a45535cbdd85323a397872178898aab15d28e3fd Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 12 Jul 2021 21:40:28 +0530 Subject: [PATCH 02/15] BUG: read excel changes dtype param --- pandas/io/excel/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6c9f61e77063b..07cbde001a881 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -479,6 +479,7 @@ def parse( mangle_dupe_cols=True, **kwds, ): + if convert_float is None: convert_float = True else: From 6c98fa3169629573ac21e7a12584317e17c3c398 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 12 Jul 2021 22:13:34 +0530 Subject: [PATCH 03/15] BUG: added xlsb --- .../tests/io/data/excel/test_common_headers.xlsb | Bin 0 -> 8025 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/data/excel/test_common_headers.xlsb diff --git a/pandas/tests/io/data/excel/test_common_headers.xlsb b/pandas/tests/io/data/excel/test_common_headers.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..98a38ab17f5c2e7f63d75377bbd9099c9a90ce6c GIT binary patch literal 8025 zcmeHMgejSmad^oN*WPCQaT0U8};5# zFZ%oa1K&OK>~o&;oU`6NXRY<_{qD6@?W#xtk^Po*8w=n ziE0#jT*3kg9E+(%E)eP5K?BWKFYXO-m6}4w@aNO_tL`rjS*mK$&WN;)%|zZh_oFd- zKAx*@zHNP#B2}Z~g3?q^gq!I7V&o@1tYqw2lkR7CzQ$u0pcha(uCzI2!v!%b4I;oFm+6y0`aUvcatlZHB;Ka$`T*#z4i= zl~rK+qY(WUmB%vXoKQENSY;6L{>P5jHNn~XAyLhkwNE`r+7*qbkX+lFb#A$^5p~pL z+Hb!NpTMS=U5jy}8*}FkpI`MT6cMN?EzozDcapgb;E2>in5HWw&F(}X;PktS^f%8Y zKQvsTfpg>m4pZHd^t-|#ZKxE@PYb??HY3&&jU91KR1USs>4_O#0aq>Q*WJUBjHZR5@q!j!38vBj7f^sn zewX%?#@=XCX}U<4#PD1CXeM6AAbp>wbv|pcs$|cak3rzugiPyd2P0viQ$Y|jv})kF zW`_hrXqLFLXJ+&hwM59BtH&bEZ`HljM5kY(s0@v+4NcYL+LnAG9L>OMbT&siNS!9) zW+La*li@bZxrA10l_o;1LCauR(5~W1`amS!Pxe{eH5#=8BkMMR4 zGlmIx0kiLhzPc24E7*Fs#GQOf_RugU25Oo_M!`r*Y=&gW>t+40ci^CiW|glifbJNr zzmPzue}f0BD~>Fha;J~-psz+_Wp{q#6<^3+T5kyr_c8H54M;4m0{uj zJ#Ubn(DrD-a$z6VuwZY%^5)l!F#`$6cdFcZXjq(vf1eYvlvF6rRBx3a0eyXuv?``oFO1od8D)|Mrd z@-Fu5a>h=sy)it}rtBlx_t7Fpe$fA29W9x4=jt9{uj+$L_dryqgvUcX$Wd$fs z6wLLGy#vvdQuy_R^^ie&cVBb8faD`S9^)@kctB-T(x(viU=!I#9LGP}QusE#iF!@~ zu4aY!rsOk41|xJnRfn)gQdP-aB&Mv*xFx}Y%_KRjOw7+ZtvO88PRgNI0y|(YC%^6VVY#nZ1h4>II*_(%u!IUQBoLTBE*tzS{7C z5KFy8o&S5Cy2KuCpN?apc0TvYc^5~09U>t&d#h?EV=1f67$eCtbgZ#E- z@tiSKaiFL|=)&BcN76pmCFwbfHPTi>K?QlSq=&U`PwAU;6^(>Cx*A>O$p8~#8%FLf z4Dt}sqJ_~gk7#LugWU234K&TxtQIJjX$X}2tHUy{lg9`d>X61nfc?-lm2hFIfKr-z zG?fs{f4^sUJ0W(9KMDZQP4P|lf6s1bORz1N?faSIo9^G%8Hym5CTL|_d20EEb*o5^ zJP@3tth`M$r(G!?{XkV#KYf!Gq>)m=9%7VnQdu5zs~*vDfRdS5hJrTfk^}iA^@a3y z(Wa)`bM;CKyV)cVLf{m%|%4w9A8u>nQ z#O?JHt`bSFW%A^$>gJBo_k>ZmBxT;25K12D!}r3=9c!%t3BB4&k;y{NOMpCoY$I#A zy8YSM_)X)+z9^9U;Y(JY!E^JfdY6 zDB0jUU#8CBLhZ;L9XhrKJa2qs>@3~RqJ>ChcfCzUc? zck!n_p-w#NP($iD|D{%jh_DqT8zBhm!(~SqQ4SbkAW^F)kILdnXMe!$1Ce8u%1NlHm5Sy zqu8VnZlB=%NLn37?^>9enVfRRS~rkm)hV=M%k^f;+J+ph0mrx%^`aa@1l@57+Z2toK}C&)O;8F7}Hmz&%jNX za@z4U6tCS>rj$1Kp2!3b)6U*1>0t30Y1syeu~y>7!E}GAzEbMws2)r4TcM8RY$vYH zX|Kz|Kv4(_%S*ynyHYV4m=#RrJNE=MC%poYs%2l7%_zo7OK4LHbOUQcNA^9L_KQKd zR4vMfK6?#Thm`7k7K}b3`T4YaLjEUM%UxCpDDcjA+MBhuI#G3Mb~b&lb^L_awX?^{eg0WVf=i#Kg`phZ{9}ic4_7F2DR8U`nGZjtY6BxeUJte-64(Wb1-=on$~bTy>!`>TV9#JNW1PIF*lDX|CrGJNUW$ULee!@(PBCwpF6fzOF$q|5eLub?B@kgbMZa7*Sqk;+!9u`> zqvVYYJn_!EOla`@sQ-Bova#XQU40LryRC?bpKixg>XnqQnG`aMBxoDU-Z4a*#gaoT zh|`Ysfti1DUDEQ=yQ$9|xdacv=nu(*G(~e}Jjo-9zT91@bwW@aR1n46ZTiTwNf|VV z&mwR|q7?ao!$52s^|%N)iu9;afkv6*kTW^=i%83ykB6vD2B={U1a<));Sk{EFm6a{&v;FhC?*RRD86NMk&$tC)xz zY&4Ll#dLsN6ctXH|Khfg;=&w5U2~s{o{yLTP1{oaR}zg=c%=4BO$Bq1TBO@}dH>8z zfxZAY@DflHfsaPD#l!u$ZaZ-bb4R; zc-b|N9PY1MQm0xk5@*b7psw z0;D8I?Q7(RGBvl5hwQBHI2)STViXH1lD}YSYf70ax&EUT$^y|Qx5J*0z~*zRA2aqh zJ?dl$20J_bwWRre!|~q*jm=2=W0;UfUb@KKHF7pgJ!0<;Q=2x9vECpPsd^6ON{j-_ z9h@B7exBY>Iup*ist(pzppSsqWLoR0K)Lxxs{D5crnUs=8G13Ndd-aJ{Kp;KH%6*W z3Tbq_aW!reAjmUph+9_xkrHTWmm6M~U}`@etWvTp ze<8<|bQE8I%bC4_zXMAY>xwcN*;xHsn+SSLK=Uddm48qj-a8&(h-a?5`}T!|%B8y4 zyL)E00&H`S+)j zpu5ty1h2RPxn*Uu7p|JE8Ylbly%mP*4TtNg^>)Cd-T@V2MI^EGQgHNn^(u2Dw*4CJ zf$&=e%hm7D9i0IYiCwFb-6;uH2+C4CMyUW|_qa+1YsYG*Qdy2#85x9Qx0|*=%B{H za*!AS0Kon=2aO#Z{!PJey!~;c#wps*v15enBAk$8ED-Z8s9^XPXUhm2m7BoDxmqaZ znPyc|hZVL+fe04_eD|2#KpQq+OwvGyY0C^j=k1CfA+{76yZ;5(BRI_O{8JL?4bZ&L` zpUOV7!bUEaJE9f93xh+MNrEkj@S_; z4^5-OI%}RuR=B1x7!H>X33uz5!So&<W+mVOj^%J142M8&h1W6#>O>@?fuCGMcR z1_s&c#bTQ-3v8DC^FHClXh< zHTNdY&3fKXq`PRpRrhWh-z-G^G)_nVZv3+>b<^}_rRAsT8SsyFmzw}Lh2Kwr*RaaX zUuDVvPX^uuys4vp0v5w$6%6n&{mO0f~6h+fC2lF!m^i^ Date: Tue, 13 Jul 2021 10:47:39 +0530 Subject: [PATCH 04/15] amended test --- pandas/tests/io/excel/test_readers.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 1ce447828f5b9..085d600395687 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1278,12 +1278,23 @@ def test_ignore_chartsheets_by_int(self, request, read_ext): ): pd.read_excel("chartsheet" + read_ext, sheet_name=1) - def test_dtype_dict(self, read_ext): + def test_dtype_dict_unchanged_with_duplicate_columns(self, read_ext): + # GH 42462 + filename = "test_common_headers" + read_ext dtype_dict = {"a": str, "b": str, "c": str} dtype_dict_copy = dtype_dict.copy() - pd.read_excel(filename, dtype=dtype_dict) + read = pd.read_excel(filename, dtype=dtype_dict) + expected = DataFrame( + { + "a": ["1", "2", "3"], + "a.1": ["1", "2", "3"], + "b": ["b1", "b2", "b3"], + "c": ["c1", "c2", "c3"], + } + ) assert dtype_dict == dtype_dict_copy, "dtype dict changed" + tm.assert_frame_equal(read, expected) class TestExcelFileRead: From c10b931fd59bf38ed95e2fb7a2092327d9c9b04a Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Tue, 13 Jul 2021 22:45:57 +0530 Subject: [PATCH 05/15] modified as suggested --- pandas/io/excel/_base.py | 31 +++++++++++++++++++++------ pandas/io/parsers/python_parser.py | 5 ++++- pandas/tests/io/excel/test_readers.py | 4 ++-- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 07cbde001a881..4d6a766ad6cfa 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -358,10 +358,6 @@ def read_excel( mangle_dupe_cols=True, storage_options: StorageOptions = None, ): - kwargs = locals().copy() - for each in kwargs: - if isinstance(locals()[each], dict): - kwargs[each] = locals()[each].copy() should_close = False if not isinstance(io, ExcelFile): @@ -373,9 +369,32 @@ def read_excel( "an ExcelFile - ExcelFile already has the engine set" ) - del kwargs["io"], kwargs["engine"], kwargs["storage_options"] try: - data = io.parse(**kwargs) + data = io.parse( + sheet_name=sheet_name, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + squeeze=squeeze, + dtype=dtype, + converters=converters, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + keep_default_na=keep_default_na, + na_filter=na_filter, + verbose=verbose, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, + ) finally: # make sure to close opened file handles if should_close: diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 7c9fcde08bf24..b21ac3bfc7e7c 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -81,7 +81,10 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds): self.verbose = kwds["verbose"] self.converters = kwds["converters"] - self.dtype = kwds["dtype"] + if isinstance(kwds["dtype"], dict): + self.dtype = kwds["dtype"].copy() + else: + self.dtype = kwds["dtype"] self.thousands = kwds["thousands"] self.decimal = kwds["decimal"] diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 085d600395687..55dcf8580a32f 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1284,7 +1284,7 @@ def test_dtype_dict_unchanged_with_duplicate_columns(self, read_ext): filename = "test_common_headers" + read_ext dtype_dict = {"a": str, "b": str, "c": str} dtype_dict_copy = dtype_dict.copy() - read = pd.read_excel(filename, dtype=dtype_dict) + result = pd.read_excel(filename, dtype=dtype_dict) expected = DataFrame( { "a": ["1", "2", "3"], @@ -1294,7 +1294,7 @@ def test_dtype_dict_unchanged_with_duplicate_columns(self, read_ext): } ) assert dtype_dict == dtype_dict_copy, "dtype dict changed" - tm.assert_frame_equal(read, expected) + tm.assert_frame_equal(result, expected) class TestExcelFileRead: From 4419146925b92bd8a91ea61b03adffd38dae0a1e Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 17 Jul 2021 21:38:22 +0530 Subject: [PATCH 06/15] suggested edits --- pandas/io/parsers/base_parser.py | 3 +++ pandas/io/parsers/python_parser.py | 14 +---------- .../io/data/excel/test_common_headers.ods | Bin 2941 -> 0 bytes .../io/data/excel/test_common_headers.xls | Bin 26112 -> 0 bytes .../io/data/excel/test_common_headers.xlsb | Bin 8025 -> 0 bytes .../io/data/excel/test_common_headers.xlsm | Bin 8696 -> 0 bytes .../io/data/excel/test_common_headers.xlsx | Bin 8646 -> 0 bytes pandas/tests/io/excel/test_readers.py | 23 +++--------------- .../io/parser/dtypes/test_dtypes_basic.py | 5 +++- 9 files changed, 12 insertions(+), 33 deletions(-) delete mode 100644 pandas/tests/io/data/excel/test_common_headers.ods delete mode 100644 pandas/tests/io/data/excel/test_common_headers.xls delete mode 100644 pandas/tests/io/data/excel/test_common_headers.xlsb delete mode 100644 pandas/tests/io/data/excel/test_common_headers.xlsm delete mode 100644 pandas/tests/io/data/excel/test_common_headers.xlsx diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 980c75b370bd5..6488c60609dc9 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -532,6 +532,9 @@ def _convert_to_ndarrays( conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) + if cast_type is None and c.split(".")[-1].isnumeric: + orig_c = ".".join(c.split(".")[:-1]) + cast_type = dtypes.get(orig_c, None) else: # single dtype or None cast_type = dtypes diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index b21ac3bfc7e7c..35120c7a791e2 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -25,7 +25,6 @@ ) from pandas.core.dtypes.common import is_integer -from pandas.core.dtypes.inference import is_dict_like from pandas.io.parsers.base_parser import ( ParserBase, @@ -81,10 +80,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds): self.verbose = kwds["verbose"] self.converters = kwds["converters"] - if isinstance(kwds["dtype"], dict): - self.dtype = kwds["dtype"].copy() - else: - self.dtype = kwds["dtype"] + self.dtype = kwds["dtype"] self.thousands = kwds["thousands"] self.decimal = kwds["decimal"] @@ -420,7 +416,6 @@ def _infer_columns(self): counts: DefaultDict = defaultdict(int) for i, col in enumerate(this_columns): - old_col = col cur_count = counts[col] if cur_count > 0: @@ -428,13 +423,6 @@ def _infer_columns(self): counts[col] = cur_count + 1 col = f"{col}.{cur_count}" cur_count = counts[col] - if ( - self.dtype is not None - and is_dict_like(self.dtype) - and self.dtype.get(old_col) is not None - and self.dtype.get(col) is None - ): - self.dtype.update({col: self.dtype.get(old_col)}) this_columns[i] = col counts[col] = cur_count + 1 diff --git a/pandas/tests/io/data/excel/test_common_headers.ods b/pandas/tests/io/data/excel/test_common_headers.ods deleted file mode 100644 index 03595c18e8aead8b6e401eb38caf6028b42c4dd2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2941 zcmZ{m2Q*yU8plT$ErVeaC4$H$f*})Ei54YF$mn%M8{H^l)V!-kL?>DhQ3k;v$f%Ks zE^2hc5WR;GE!rD->pkn`uDj3L>zuRC`hR-D@ zCQcUz7lfy;8^Y4f&Dp`)($m2eDdL5+5q7miIiQ4H-4I9{S8KEj0_iD?a`Qk~+Mw(a z2u~O>CI~?JD*`v*C3WmoBpCqkh`7!9PAE@bX9P;v$HlqDpc6R(0XBe_<*4cW68R(r z5~#I3?7vt)+MwzO3Eat64Z>IzJf5D(=hMhGMTO6dFZHBnZ(i$d=*TP-H|wO-uV+H& zF%5f+-(B7><2t1O9MxLUdS4Bn1$OLjK3RI?9U-b^Pq-zvr-z>$11o4!&R`0KWT_$7 za99@c$swkFPz%O@EUB}={K}{-8W`GdO0edp4bbTGDz2`CFYdWGqxaf0HVx(m4eab_ z`9%mX@vBe7dF*ceQN;Cj0k9|5ZR~b9l$DE9go6qO9lYx%1qdv|8yhY&IDUpAz83_D z<4lQ(4oran)70K5;u-&3E(dGCHL;o=Q*!OlVexG=kdrK#)~YrboyBFjpvYk@9b4Ix*t@w{*-?%v$Gy&Nv^k`3?ID6j7%}sot$P7od)qJ$9Ql7 znJDDfwisFMlh*An>6b=T9F#1HEe#CBtiE61tfuq=*A5Bp0PmpZ2TPfr6wt*s)zNzk ziE)^GpGarEr8H1u8D>#CF;96o5IpMq>N4AisVRf^^<{v2*CkfT#*t6|`jo3L15D0} zD*PGPsar72Z<_iJCNET64ka6Dlo{202oS8JqtEcnFVK;f53yrJL&vaTeCYdj*o3M}-I) z+vF!lY=M&M*KiGs1oz|zY4v|HS22v39E}<_g`~fc38dy2nu%v}67y(blbvO@T7*mL zOg3p6X$yW%)d3!&4f_Y;^Bqe+ly*((ok;V`C%#&C4=j9=ppm`@1^~{^$N>LOrOD3XQXP>>YeoQomPn4*hG#lh*86USwD`&^WM!0%|y58&wO_g@)8gS@#4mxHBmnwsQFNo zRrcvcsM9(}EV)RgLGNEP_uB5c#q~W#IA_WI)t70QC zEGaZMVV9{DYhu+XrBoSinyN}xqHwKGH9LR6g0wlGq-Z<_a^sQ-Y~V*56CS-SkL79B zQrqH*6@98(3Jr}kxS1i+ITP;Z*Tv` zRoE6gsXB$8H@arnKQ5VJQmWCLD)z_NSMKhCFGE0jP8jXvrSGRJ+$P`q+hiFlPthpL z-VuRaI?P(U;kug#2Nh3}4A6i1tyG&M7Ou3%#K!AsvD_@fUr!HTn3H}om$^M-yeV*+ zDL?G5+)_%W4vcs`TPc=Bv#9S=eA}^*U#sna9389>e+s+ViZ0h8#QGgi2Qsr~`}v1I zS6x@fJT{W>0n@Gv_omdp=FQj#Xx+Bk-2*bmk0q&oeCzftfIs8t_w_TJTiXPqY`v{v z?!>bKIj%mh?Way!q-9hAe=vld4#P29>>V}>3$%pwQ}(jnnJ^By)@kuA$E5(^_%V^- zza#$a^X2bjM6^Mq000Z|uBoi2Af%?HBI06+bg)IBe!-57Q*a~`rBmL2I`zp%&Z|=J zX6CwI@E)LP1WIWt&a2rmDXV3f;H9maEv?47`7hr9i4?4s1gdC$xA(pMQ;_4sA1k}< zM-y8`EVQstvr7%;@w=8!{=$Q^pxluYG{b5w)gx<&n}$Km(!s&&H)`;4R`ePQO#7&5 zIAM-cIxkA`ak?Sy>g2omd(gPtQG@Ar3hQ8nk&zt*CrOI9zhY#xp^r(ry~}3Ad6i^& zBC2B9jYp!K{Z=0j1>2R6F3HI&@~u2Ly@ShY3{mwznk=~@Lj2Lcr{a1VEnJNR07xUI z;+L;Mcv_wxbx}I?$mZ)Hg5WL}us+)TnoHaUoqoMOJb{4#{Gg^SsuZ~PW78>wQ_>jE z_|2D)YwFw=eSg~G6|}Ae+5;gKdMLD8-f~3H*ba~1F&j0zO3!yn@~nFJ1a8SdB@j5Q z@{ZpAHT0|GYU?{|cfHvWN86egF%DCK*5(itsf_j*M^7;N_$ubK>)KlsQb_%H<>Wb| zhhvF)v`m5t5C*|`hUO=J6<+IXz!a@a9<}xcIJj^sFE`QT!6ys0` zS(qAI)u(aFkUXe{F=nqp6;t|_DgnxwHB2S5A6!pll3({bYl10i86#(PG(>9EL!Ua) zf@J!qf;eld$8uQLFHg}io{)a%OCFk3OtO>ItauhWzzMHF^ct+iepzReFoNIN=7g8u zvrgQLaxjT2A2OQ$k@k%zLudDYl}AD<^vm86^AGsDkvw0$82`1EFpYE7KU>8Mg&y(o z!Z`k}`@NZ6=vL2L+(m2qUHCg97s4K@e*ot95Wkn!MTjwCr4jA(uR?=qP*9$Kp&(w@ LiG}#!m*@UB{N5Ru diff --git a/pandas/tests/io/data/excel/test_common_headers.xls b/pandas/tests/io/data/excel/test_common_headers.xls deleted file mode 100644 index c02dc69aac93b80661425d773dbbf4bbef12d1ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 26112 zcmeHQ30zIv_uuz+s~e?3BB9%;G|#5o6hbBQ@C>Qk2Bk=mAH90XkTIE+=ix z{Ri6*cdjRlqaRTtUxZpjLxRqNYZp;okHCGQ5VN9RXSfEE>iA!z0WB_cq=q{ACisfx zIl?9yTte#M_Yenjggk^aiOeJ&2nmV~Pl%r#ACbrlj);g37x2c+79{ZgLx(RVLnsSn zI17g74Oc9Z3sik#Oi5-yo!e9AgH*Xw)Y+RDkafM@v1WSM`}Ty|1nP}v`%vXpQfG7O z+z!qxa-6Djo1(aoZsawT=aMWmbzrT@@*|-nmPC^_Nu;E%wXdNq z?q4ZEpoDTuU=gjaVzl`R2Ag#JN(mNNq_Unvt!pX7=*Dp)>cE3#PKtTv#DdsEvLMbR zgxHckFt}|9^Ufr=(~aW|&cBPIY7@ywA|TVqWF-K6qDi{S0yqM1W!&levE_BI zluIHB7EO8(SHcPrx3~x4$>>A+E^4P)V<=M}(3?nOSl^sDK%#YtcYLVpZX8D_YfDIo zO%I-hy@iEyh;1Ldo@7JiCeb-_2Xl(<6JpzqGY@*Ew_@=m)g~>1w2;j$mM4?UnKSeVhGkp$a^7=`lU~2c`7<;;@__a7MPs)VAJ^OK%n%0 zwCN6K8>N3)`auQsOa=5U3g|fs=zA5=_bH$&l4pa0@&yX$itrVcKdb=fKP^u&#TVvM zjQ%>UfPPE?{kQ`95e0O*y|R31dD_azv!BAz717t}>u+qku`S`y{h}|XM?{=4J!48Z zv^?tILnu4^6ADm=$Ubcq(Crn_d&s4?2TTh8T@!rM_-Z4fi|wDbLroFAh75;GQ_*;Y zDK3pe(~U%Qt!6rpkhD%RbOv7?QF*a`X#3F<(QC+5xa=&#(H7Cgax&~QS6|<#tr6&% z9(V0T%l-omuS}lt|3`~3KAdvNZi;->r%K5 zBt?pj1=vIf{J$Dpu^@?_5ba>T!IrInMOsyXo~*Wl0}9eA4xH9jK$qGo4%jEHs(?n@ zstVXCt*U@#-l_^XzFJiQ9ZIVz;K*oI1sr;$~c zLhsgA=p$7D$DgDVV5M;TGrR__AE}AG2D1X2+vHAgV2WnmvLLQ40TJ?*gSfQ>#MMn2 zgy{s`0i@YVWSS!@y0e%odN=ULLP>?OInaK*QU3c?q2l)2NP*-M4f!C#i^@Q}q(R0h zkVM#!D+W# zeVSs9Mo$om9*wAd8KFeoB1~h6H6^R=9^#gv{o^6mr1SFf#2_LY`1h%sIWWCJ40>Q< z%b;JRZ9ujaxKv2@54J*PIwKmGLSdy$D+&t>WsC-COrt>>(`b+e{HQ~A%H#y}=2oB` zNU1i3JBBeH`$7ed*uBcM0`-o~lXPm5E>|=jW&M^p)N1zIkdf_VCa^3BPK=vDWTZnK zs}Vtjq;#?%FkSyTNM)dMkS;9&VLE^2=&_IjfsA~e;G9vhFUMf21v*g<(y=8Vm4V7Z zy0!$w)lE5wr4$HcX))Ej`H{~E!QXr6#uM?d4 zE7l1Hzb(*-auDN|fK&!52Z8wU>yo&-DF?BZ0)dQtonUaISSJ|Rwm>J!K_EQ;I!I-p zau8liKwRCFgLIbyfsA~eVEm?7C%DMd0-Y!aF=+`%WuS5pvzCCkx=DhpWg8GZczh98 zXDglquYY*BATH4*n;S&3QFToRi_=Z011tH?#ES4>`JG4llW_PaAaT^&E>r-EXcJA2 zAfm4Rtxz4W@o59I04rRZKsNeJHmEqyEkz;Aym9UYtw93Fcckak%W6WeD$I|SSu#vR1%0S`M&GOlF zX0nlEX--PmNLpHDVCk(_^4XX$*~qapHzjN&Ev+)J{J~NAY`P#DPvw@z12uryb< zReKclO;;uxIhJOrgpH)7xw^eOlp()0-I#3TSelIzHjQ+%vA)k#YlZ_lpvsc1K z($ZYrgmn?}+3=WbACz!3Ju9{04Y(K-EQ$$g#sHPHB0^rKxX1^!Yz77U zVS-vQKqYO45L`>3p6X0Vt(rl>T9}}=3{Xi6fhM0N@JhJT8L_Fy^An z=JKRPQCEYe`+-Vkz`sxsfDkNepQZt2Z5U-WV6SC!Sss}PPlCzfIu2@)OQ7by;bfk;hGUv0)No|f&=hLR)R5-YfjGeooD58!85V53&CiBvQw!!j@e|$U`Y9DD_ zR%aoH_=D2o8K#OW;vy_yCcP^mG~yO;HmhqaEQ|b~^|%B*4MaXJH2`M3t`<~)M@x~;J-Qi^B)~UnKHAxWugiiqj@uAU_!ly1^2>m%&acpn1=x{wlo233LOg_(NO*r zUY3a3n+tDLnhYLHDviK!bd#2qMS$fnu=E7VK_FC<_E~TV-dQ|edg6E)7N01{=7vdQ zu-m|RL5xHYia`5Bo);IYMJ^2&nVTIgofEkn}H5?G!q=rNkip=_wRgyi;NPcoH-uTd63t7hUW zU$Bi)Q364t%NA}YvPFU?ZMgLNlbx`&e5b@cthWLTnFSBGb1W_-LafTzBR0breOAD$K)BY z`_^0@R_3fXuyOFGg-;xY<`(*Jt)8b}Ty=5Bg0nwwwlu2vrEvF{zZ(OqNBidUjAHHU z+#0{{^C+RzRP$Kbrrg~_55D!avurT&pPYB!E4Xg|*v#)bcj_COr@QH0$>~!zUQ5!3 zWNsOmH~;C($iPDHRapt9SwB|))subMV~%y&=d^ZLl1BX6sUdSs&qo!0x1OjK9qKyX z`_+?=n=Bt@9WCK|4%;`>kGt^5+3cDZ`5y;W82LJ0D(?Q?@khJN?N=t8AN|qz(6cE5 zkLw=!_jqsHt}rM&q@5XZEs|&K@bpZd?=xTAxj7;B)ZJBus}^`psT_7rXgKvf|5^JR zm8la`K_fz#KuYT?@8<5`&Vrsip|fzuM@WJocD9ovo-kV|5)&-v$dBAtt^$NY} zp#$%@hgqiXUNbA?mg%PNjURXh-gs{`@|M@s1FQ2Zgh$GpE58pumwM3t$v{q=*Sf&y z$6?#^ogR%U&B_<-t?5>E?8L{LpP?zT4UL(%!Dt~ga2X_gag-P(m?3bII0wO0DN>Mt z7NT7l)TWH5=To1ueQs8rSNKVrfnkN051jl!{Bk?@zxYG-wmY|;JX+{=^}b<(Wr8b?OOL-^V87cW|MuW6KiS9DU+nPod2*+Uqbu9rAM*3;5h)h6S&Q}`yUV+I z%&yk#cl*0jiY=#mFZUn!>_(&Q!+{NU)@|55aLTtJTE-3bk z2?6mYUV5iHK1&_{qV({z27Ti-Y7HH_AC9#cIr8z~AvKN;uM+x?oResmJ<3VF&5r|} z)c?`2-KO&otj_PMO~ZNpbG|HXS6uCAJK~MnpfyHC#4jMl?Nd*;K5l*H|Mu$0(tmW{ z)b{+tl8{qlUVC@Zty_Ab|J~GIe_t@?PGiP^H{20Mk4pGu=g*w{<9YYFRYlsfGQ6`^ z|Kz#%@9pa+|^HcJw?@zFaA9Z3y-te}5m*x-kD6({)<8Ip{V^gN}lY3e_l1x7sAHLD=TkDk7 z=4SOvNp+p+%b~l1)v9K4y9X8j(z_t%N_Cf*wCaQH?(Ux+QxH|J^Le6O?%d1&46paFsSWw& z+}z$lhjuKw^@Cx^lye(Rjt#ne^I(75(}n2=jpj_u+nX1%(0Ay=iZ&&~55#(O+_b^Z464qf9LPzIns84igM{^X5X<+5tB|YyME{#f!gx zs@ME4f4%q^AL%qD*IOt!eSEB-;^p?9&QIu5?95>o0e& zg`EET^J?8aOWOaoEBWu21#=IElzx6yz)$_h^QzOAub<6m9FnnUZd!1uzkRw|?>`^0 z&g2f@54e!+WA zhqcOA-SD=$P#CjOyI1>I`_kPPVt5bThiuc{=W}V%hryh0Hy3l$3monwZcEk~GD${)9rGCruqhF_3`gN?hw?a2)(!{~96Vq(}xNTvd5%4st zY@W_;eunm=IrBGOH8-iS+I%AMl|^~rwBUve_S_MVyShA2pSw9_ zNq)?0wPm+T{?$*H*8r*y_#-8@wH`OGl3F864vp3|_MUD{NIavHzc z?{8K(ePfBytK_Y-E~j=ZzWCsD-=e5V4^DH87epHmezB`kbH)w-n%g|xP5sg}%GSsA zzmSmXxZ|IDjdFTFDgO|ZbCT8BqTqh!;01Ai9I5%RPPnz`;w-O8!DWxe#^&t{DC*rX z^3uJL3y-vGG>whznQ?G$ zd1l*?o|avztrhN;b78~60ZE7RSNs@RGSyvE zE2S_dbj0jKC-&DkO*(d-oiCX3oqwEex0;4;i#g^zPFc^WYyOvl_Y`!zS9|r;u;7Dk zyE?F{!ko-`jfp|CCeQDiRN`La_#k?DhJKu<)(qpj4>I+-W&C!1&sZnlM}?;kJaIhs z%y!hX&%C1voonxyhRqrivhdf0xE~I0wvBqR_2%bGV~u?ymS&gm7iYRGSZT1d>-O(U z^mHB^yFC2uyTxaQMhrQT0AEy5eGZ-*vn0tn46o{?5R4!4|vRGt=J`EL?AylYVl3{j(aAD%;cy-i1rG zJ6s9=N}k`D<(YfNAqSIds@H@3u$Cz~`NY-qH{9bw6km;Tl}udwpc$yD<@J zkEVYc;hZ0uJ{>r&bZug!OU#mi#^Vd0 z)Gj-9ZhC0g&(RT!f=4wbT-xuDaChXktT9^7$q{`2+ESnX&iPMrCe#={s&(I&@L@si zfv(HdzV~}+KJfg{f3071+v=pi<9u;f`zd?gg2HOfS0`T|H$9+X+|#EMXL$Bp{P=ph z2ta9@i$8J)>z+NTD8AME#t9=jwj!8#EX|Y2kg0yOF~a?jy}0@YeoBT z*LyBedpmP`KVLs<%efgRUaayNX*4XeuFteXQ`1+)d%f%sxhB9S$l7wGjibw^gm&GB zuFb7lHu_`Qfu1{8k9};ub65YLPgV_XOiKCTO%7|pPnM-&dh=_E@rL1BghxL2?3@|t zWNI||%ZUE7>lUfo>?p5%V3D&okaKBbNy&x#;-<6M4zQ6%(X1AVNgQ(KxWfaa0hrUg z=GxAnPl-t%vm$fa)LhXR=97}0z*IjIi8+U9M!G2viJH+cF_=csj8O-Hhom%0Ok#L| zI&SrVcu$>7hSt|$+(L37(8-YqE#L-O0kjnoG%QC2In?*IdQ#upwq+aQZR&tHpDc!5 zT&G|In+^o07|ydHRMVxd_2EoleU3{$?Rirj6gHe6LeYCrbUW&tPEpdRpoYe@f*_g@ zAJ%tp=Ntq)1i$7b3#rl>)Y%$JM^ML8es_s$8!^f=zOBUdc6iUA28b+>j|Cwe22*a- zaf2`V)jg97E0(zo9nccd=f1d@MIB>63eXO$iQ$3L(q}Fkw~XPn4=NSnLtTP2%rI%F z872)D_oQf_sXic}O+4rvODe~P9HP_r)F{0ZQVPK)QnB7^K7D3UWmq7&F0Q&a<*)|I zk2O#tijxn0rfaaN8n8e?)nG$iU0iCRY9J1lPvb;2<=Mx!M27X%07MIE*^-r94v0RR4=rFuQEYqWQ`Fw3Z$kM$?ZrYC5nOR;X5F; z0kqvBn(Fw!r-3i9;?b6awk>@i<-wwDZ$Ab{=@qQu61(n4>YRaA2?;F`@B2VPTOSMw zEhh?+0i>mna8YI-BzW&lPC&v1l?#w4{{;_W+*pS@^yL0g0id_!KE~qB^@BpgqvI0e zXGie{CWi}R;hqm9@{6BV_?%lT>iW~3beFleSZMaDL!|+g22>hQX+Wg`l?GHAP-#G= z0hI<+8c=CKrGfuX8c-Jhvum%^<~o|_t=$Omzx~Gp82@7o-?UK!wvEs$Oyl7fzd)P@ z2{!^@?2YSoxEBoL{soZm)pG_UTn|_QNgL8ONRsOT9LVEA$LD$!7UmHX59V(-V1nxq zxE?`oMDh>R<(*VZL7%*eZjqx|`OTjH97*pdreT?;S-x3DX7_(#ij=OYlX9>pm_~(o8 zoj=C;qMbz;>*IO=zTd~cID>y-1^=Q5zOTnWGK9goDJ1-VBh28}9FhejOGs9btRcY` z0qVXj{Puuk2gx3i10?v?1HMwE?|%ji#8Dmpng&L~XFvEQ6AwP-#J`?`A8@wjFe2*I zJ|X0ecEsc2nUJ&8nGB+Tq|c)NF9fOMfO`UbsuxDd5hGo&1v%JdCFYMZw4e%A#HJrz zQk{&VK7zt8bg}LD9b*I~6H=N#X;y@51N?Ce;rMAOe;$J2;DJM(3;=HU3H1#4Bnx|> Qb@KH57us)wjGOrX7cA+ymH+?% diff --git a/pandas/tests/io/data/excel/test_common_headers.xlsb b/pandas/tests/io/data/excel/test_common_headers.xlsb deleted file mode 100644 index 98a38ab17f5c2e7f63d75377bbd9099c9a90ce6c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8025 zcmeHMgejSmad^oN*WPCQaT0U8};5# zFZ%oa1K&OK>~o&;oU`6NXRY<_{qD6@?W#xtk^Po*8w=n ziE0#jT*3kg9E+(%E)eP5K?BWKFYXO-m6}4w@aNO_tL`rjS*mK$&WN;)%|zZh_oFd- zKAx*@zHNP#B2}Z~g3?q^gq!I7V&o@1tYqw2lkR7CzQ$u0pcha(uCzI2!v!%b4I;oFm+6y0`aUvcatlZHB;Ka$`T*#z4i= zl~rK+qY(WUmB%vXoKQENSY;6L{>P5jHNn~XAyLhkwNE`r+7*qbkX+lFb#A$^5p~pL z+Hb!NpTMS=U5jy}8*}FkpI`MT6cMN?EzozDcapgb;E2>in5HWw&F(}X;PktS^f%8Y zKQvsTfpg>m4pZHd^t-|#ZKxE@PYb??HY3&&jU91KR1USs>4_O#0aq>Q*WJUBjHZR5@q!j!38vBj7f^sn zewX%?#@=XCX}U<4#PD1CXeM6AAbp>wbv|pcs$|cak3rzugiPyd2P0viQ$Y|jv})kF zW`_hrXqLFLXJ+&hwM59BtH&bEZ`HljM5kY(s0@v+4NcYL+LnAG9L>OMbT&siNS!9) zW+La*li@bZxrA10l_o;1LCauR(5~W1`amS!Pxe{eH5#=8BkMMR4 zGlmIx0kiLhzPc24E7*Fs#GQOf_RugU25Oo_M!`r*Y=&gW>t+40ci^CiW|glifbJNr zzmPzue}f0BD~>Fha;J~-psz+_Wp{q#6<^3+T5kyr_c8H54M;4m0{uj zJ#Ubn(DrD-a$z6VuwZY%^5)l!F#`$6cdFcZXjq(vf1eYvlvF6rRBx3a0eyXuv?``oFO1od8D)|Mrd z@-Fu5a>h=sy)it}rtBlx_t7Fpe$fA29W9x4=jt9{uj+$L_dryqgvUcX$Wd$fs z6wLLGy#vvdQuy_R^^ie&cVBb8faD`S9^)@kctB-T(x(viU=!I#9LGP}QusE#iF!@~ zu4aY!rsOk41|xJnRfn)gQdP-aB&Mv*xFx}Y%_KRjOw7+ZtvO88PRgNI0y|(YC%^6VVY#nZ1h4>II*_(%u!IUQBoLTBE*tzS{7C z5KFy8o&S5Cy2KuCpN?apc0TvYc^5~09U>t&d#h?EV=1f67$eCtbgZ#E- z@tiSKaiFL|=)&BcN76pmCFwbfHPTi>K?QlSq=&U`PwAU;6^(>Cx*A>O$p8~#8%FLf z4Dt}sqJ_~gk7#LugWU234K&TxtQIJjX$X}2tHUy{lg9`d>X61nfc?-lm2hFIfKr-z zG?fs{f4^sUJ0W(9KMDZQP4P|lf6s1bORz1N?faSIo9^G%8Hym5CTL|_d20EEb*o5^ zJP@3tth`M$r(G!?{XkV#KYf!Gq>)m=9%7VnQdu5zs~*vDfRdS5hJrTfk^}iA^@a3y z(Wa)`bM;CKyV)cVLf{m%|%4w9A8u>nQ z#O?JHt`bSFW%A^$>gJBo_k>ZmBxT;25K12D!}r3=9c!%t3BB4&k;y{NOMpCoY$I#A zy8YSM_)X)+z9^9U;Y(JY!E^JfdY6 zDB0jUU#8CBLhZ;L9XhrKJa2qs>@3~RqJ>ChcfCzUc? zck!n_p-w#NP($iD|D{%jh_DqT8zBhm!(~SqQ4SbkAW^F)kILdnXMe!$1Ce8u%1NlHm5Sy zqu8VnZlB=%NLn37?^>9enVfRRS~rkm)hV=M%k^f;+J+ph0mrx%^`aa@1l@57+Z2toK}C&)O;8F7}Hmz&%jNX za@z4U6tCS>rj$1Kp2!3b)6U*1>0t30Y1syeu~y>7!E}GAzEbMws2)r4TcM8RY$vYH zX|Kz|Kv4(_%S*ynyHYV4m=#RrJNE=MC%poYs%2l7%_zo7OK4LHbOUQcNA^9L_KQKd zR4vMfK6?#Thm`7k7K}b3`T4YaLjEUM%UxCpDDcjA+MBhuI#G3Mb~b&lb^L_awX?^{eg0WVf=i#Kg`phZ{9}ic4_7F2DR8U`nGZjtY6BxeUJte-64(Wb1-=on$~bTy>!`>TV9#JNW1PIF*lDX|CrGJNUW$ULee!@(PBCwpF6fzOF$q|5eLub?B@kgbMZa7*Sqk;+!9u`> zqvVYYJn_!EOla`@sQ-Bova#XQU40LryRC?bpKixg>XnqQnG`aMBxoDU-Z4a*#gaoT zh|`Ysfti1DUDEQ=yQ$9|xdacv=nu(*G(~e}Jjo-9zT91@bwW@aR1n46ZTiTwNf|VV z&mwR|q7?ao!$52s^|%N)iu9;afkv6*kTW^=i%83ykB6vD2B={U1a<));Sk{EFm6a{&v;FhC?*RRD86NMk&$tC)xz zY&4Ll#dLsN6ctXH|Khfg;=&w5U2~s{o{yLTP1{oaR}zg=c%=4BO$Bq1TBO@}dH>8z zfxZAY@DflHfsaPD#l!u$ZaZ-bb4R; zc-b|N9PY1MQm0xk5@*b7psw z0;D8I?Q7(RGBvl5hwQBHI2)STViXH1lD}YSYf70ax&EUT$^y|Qx5J*0z~*zRA2aqh zJ?dl$20J_bwWRre!|~q*jm=2=W0;UfUb@KKHF7pgJ!0<;Q=2x9vECpPsd^6ON{j-_ z9h@B7exBY>Iup*ist(pzppSsqWLoR0K)Lxxs{D5crnUs=8G13Ndd-aJ{Kp;KH%6*W z3Tbq_aW!reAjmUph+9_xkrHTWmm6M~U}`@etWvTp ze<8<|bQE8I%bC4_zXMAY>xwcN*;xHsn+SSLK=Uddm48qj-a8&(h-a?5`}T!|%B8y4 zyL)E00&H`S+)j zpu5ty1h2RPxn*Uu7p|JE8Ylbly%mP*4TtNg^>)Cd-T@V2MI^EGQgHNn^(u2Dw*4CJ zf$&=e%hm7D9i0IYiCwFb-6;uH2+C4CMyUW|_qa+1YsYG*Qdy2#85x9Qx0|*=%B{H za*!AS0Kon=2aO#Z{!PJey!~;c#wps*v15enBAk$8ED-Z8s9^XPXUhm2m7BoDxmqaZ znPyc|hZVL+fe04_eD|2#KpQq+OwvGyY0C^j=k1CfA+{76yZ;5(BRI_O{8JL?4bZ&L` zpUOV7!bUEaJE9f93xh+MNrEkj@S_; z4^5-OI%}RuR=B1x7!H>X33uz5!So&<W+mVOj^%J142M8&h1W6#>O>@?fuCGMcR z1_s&c#bTQ-3v8DC^FHClXh< zHTNdY&3fKXq`PRpRrhWh-z-G^G)_nVZv3+>b<^}_rRAsT8SsyFmzw}Lh2Kwr*RaaX zUuDVvPX^uuys4vp0v5w$6%6n&{mO0f~6h+fC2lF!m^i^dN$z(=V-d(Nt=yLauXeH5gjVXy&k00aO4Kn@@;t#`A70s!1$001li!V_&#J6o`+ zEm&X0&E6EG%j9ZfO_mAsgf;{41oHgtsQHb^ZE5bdxb;sZtn$^YF<4 zCcUEI=NcMWm9|X>>AP#0@jyTv?xgKm<#yV>Ln(Y#Pm(n(hSf3B^tQl9UuC@+TfOq@ zU69uZo6NX{MYF$O3Dlgqt0!kdDYSw36dJNx4mQavpzOY%^wwBUSS|da<2Z<2q6syO67qAdxa^EOn`wU6&5cJFwa}zjL}!w zlpDT?ZxwaUV4ds}@hx$A$=jUiE-g{6DAtzO(p+N3U4apyiPy+pI@wMZtbyXbYZ_3L zwmBhQ2b*62;r;W6L?q-+akP!vk5BuK4@W#Tr;)1FrKM%J@t?n53fAmghy#z%cu>x3 zqAy)~Ag6voB+IRVYCf^#&dlqJ3(Hth%0|&y{H%UxENI?DJqDj$EOz6K=WJ+RbS^A{ zu8exmLv%NHB{ncOXA{WP|Lg*)c1P~R8Vv+ukB`s*g}?E%MwNx)0`iXxgoelvp6WZA zT7#IGe%$}Z&;Mdl{xtOBXj%DA7L!bM}D==Q5=ei{eq&_r6eH!&cO+up4#!FxI^(;7l!lH z<HZZ8@rhHh3iJ^s^=F7!IYa@t0*Ss_J+hk1MmMEUQzFWT zB?0Ai+!=c@Bgx(~A9Ht6g@Slx4#pF4`ani+=1M*KtjRBLpQ$RF^IDV}r8)9Fchfhq z{(LT$+=lhdgHbM}Pw6=a<_+73L@!mwrMFf!$LV0ITNgKCXHnltzh5Y6{xU?e{z(#F z!Pxk7$c!{XNb(GT0Oe}U{5wxv>>RC(?Ch+5$l0Hqfr4lnB+GyIRjec{-Nl05g8V&@ z#U4 z6UH6O`uGK`-O%|EG=kA#35lNsEEeX$-hR;`GFlP{t3fcjk0A}4=j#nE6+vKD?sKt= zhVcLn%V8ELHcQYb@c;xfgVmcpckh%BO8lzp>d2Dl%(fmIL>I2$(*4{>$Ib~5CHYa^ z0Ao9oZo^jXS$8N~L`Bjhg`5H&0bsJgJ_%B9x@gc=W@3}-@fuj< zet@oR;f!JM;@E7nzjtZ{@}~a@nMi1LF%)P3;1~`7cnWy}0-2w|Qmm?Lm&}6gnO^AZBsa3>X|}h?}zjG3m2;D)MKj^=6FOi`0%-(P^qWKQOkDHd1x&| z@mp#fys6oqH>EZe^f&}%T*mh^LmdQjAIGZ)%*pAwYDp6+nW^>LQjXs(mcEmg4_psH~ROOR-SO9=JI!4-UrXa^xtO)gty;EdQi>@aP#t4MsNBT_hE$c z`x{pPG8DvCCT8_sw(7c6U;CWE{NiX5WnRl zPj;U^c~U$}OMFb=L`wOrVpblPzZ@{=$wLwgMcIE=@icPGL$|iH};TW7NbM zOE3pd89E+qR?_q7++tzoE@^q-7U`z6G8Puh%Q>u->lHYJo5B3}ot%%j7xDcruO~QL z0<(QNDn(_P$$?ob5fcQb{&Sz59$-m$+dIa$E_~qaI(te%c$kCYg{lfLO1E% z>aeH~nhyRU-v;pr7!3$#0#3Q_xuJ&dNqA>Q7#m;+L4mP1dUUt--j~-rPOsN{<#y2wqAhw6S~4beONC3UN(zBNR-iSb`fgdPOk_`jMD|U#PTi|J8#+cMTEGMt zSz`85q{^@6u4!Bsd45Cq>HQISC!lp>7~XdyLS%&C7J4N3TURoCryE;3EVaXF4*ncC=34+E&4rHBGf)EGw zcjAD+Zq}xtA8Ty3s*TkG3;K7#S#R{aqXFj)2E^LI@kxc9o($rHBBR0Je7!KrH^V*O z@8z8Ji1=#9yn^MnCqN@hAf@qSIizai9>)?kBHj1+tg3q9QeH6GlwYQUC$!izcBoE_k6a1KKxT5)HlTa(d4XZ1^M`BZ= zb<#z}0{O`pOm2n9qLa335v3Rj_imDqkFEr8#Vd6d?P9sWj6RllvgW~!=9Wg2iN?{( z_s=6XbH>zWj*#qwORe-Om&!(DweD^4;E}f~NqLoH z8wbtmn{U^?uDN?$c#_@>3K~$phd#^)$cIU$ebh4dCwdK^8rm$O*dGw2O@A|Qe_UL! zX(UvC4{DhY2A&eRpGdVuIdA^%daH;mrEAMI+Q)%OC4+Qw&h<2yzR zE10yqe+RIO^>tS(8HxISF(L~n4_icx&goHgSu}zAP+9}Z@Jdrlv8~SRh-dRjL7~kv z5#hhveN@voaN$%cVaRSOV?(*_<6bitA%_QjCgdXV4&yRtBfxdKq!J|iV6Go2m?NQg zjN*MYqxyN1h-|cRtHa?EzQae;X&d*Q9@KW3-b#ux?v7ZuRYgCbKEK2 z9`cICjHt}1vTtiQvdA4XlGI2Q8P(p6(HoQ9_6p+gU z=SBq8^6H3n%o3$ZYAjJr?SY~iJ6m)!A=VQr-b*Fg?3{QJJAv47VQAEriwH!@>Rr+? znxR>Wk}b+)Ybh;f)KH1}GdxHyqLBj& zsJ?Dh194%*`uYz;NFVAc)LmF9h)Fuu z2Qa;_v2I;{UQGnr=if61C_+Nm)kYIro-U*b$o`z~_k)rQ~{lH;Viy zLS=m@E$?dA0@VZprd$iPB**oRlouoPK z=xTeo9gJ^#JmtGAUth#wZuLAn+l|3n@x0uYk0733ARSVmY4GI4K22sR=DZ=eDR?x>>WE3I7;wGKo}s?3G#6fO`V}<p8A#~?=#tiSIzw!lMK6}}QgtCEl zPomq!6)o+!Gu{x2&mJYD`i=MgdYLQFuENTHH-u@14e zJ$xcMf0WpG#z{@Xf1 zkjAJX``~uNnEG>rIN}4TVqo8}>Sc-nlxcLK)ij-BJOa@4`ZU(2{EeN1pDQkHFvd#u z?K80@DP5ke6czW>4`{NhoKGN|g3t-N8Q2|`=%zNw{cr@$H}UScM;^NNKrg$hUIYDQ zfx}i90uM_9va7nQA#_D6KYqgi1SAx^CBGn??>b_sMOxNJvB|T^Tu-5~#noD{Vm8CO zM}jM99$W@UT%bU3uo2p)_pyA*w5Ho)zEz0VqzqQ>=4p$c?W2o9({8FDy{VhW`$Dq` zJ;5^5VXMS*t)9!P%a@jk%^to3)W>H#Y4Xh#TkPbqM}x54O(HIyFwJ9O@GSWYij6c4 zmQcgbZ<06@7#3qG7&jcKg=pC3BSulQL`|1Ir-aB{s<^UKXBcM(IstLgM@#PI&RrwI zC*VIaOsyZc_1mi{9ID|r+p^=ghJ@67WpM^ z(y0LuoW<8Sx!v@8li--$z!#gSMQ5UumGn8b-{cq_ORbn1&@iXmq?*?zJwu_}Ss2s5 zu*qsR%XNlSRVgl1voj!|$WLIKy8z=0G z7d^g;RI=o?Dx~IBJlRa0RYVtE4A@sPcybB1lTw+>LMt#t7v?f3+TH<3^-#rY3gdGi z#;w}qn;>$+;KuY(3<7SWr*tZ+GVIuS-WQzqL9oT2I$S*pYxhvUDMoQlL=NmV^~nZP zovv>xl2G_IH~251`?d{Yr;nVn^IULdr~_?Z=&~_2CePGI8Zi zv_sc7l|8(Vjm1X@c1yfv@t5QJZRvwWW7rM;#dC?p4z^ypxFqXCl@EH;n#X_qzA7`# zhbO&L)dF91- zB2pmtUE7h9-pB_C6#r=$@F|Q41qEB??45a8#Y~TEL{4t4qiPZ7K zDfe?hsG3oMRg|1~zlwMPuy`>q%wCXMuyYt*k5Tntx4hk%ut(s2`8mon8&hrY3APfJ zwtO8X)AocBd`P$A{@rVh4rBuQ7wH6MiUra-A~bA7)n9WvS6vC{!%N=V4EicY(kOD*K z8eJL7O)$?pqY%cp@^Z*}lXxIBhl>;R-MqHm@FCR(ipIg?y_LNTORqgVN^(@thu7^A zCg+BQBxF1RNR|r2Mwl26hfW5T_xr+SGAT9Sz#bRAp43WM(ypkY1Iq1m+@DjpZnYeIEJ+ z!L}F7m)!x|!Z3pL#38A%@pDU_6(Vk!G?Zawq9RCadFSt#N|zgnguXWIbiVSGIVUwz ziiDL#pi*8(jLf?^Xxh~OR}gmSX(B{IKu8Gj=g@x!p`pF~|1b!dvOlhrXj!|3AI3lQ z9VyBZA+L=hif^&eYxRqABdBQSH*&egpDHPXilYu@hheC(0%!fa#9i;woG07wpse{bQ1Jn#1F9HPfD$>4eU&UZn$EF1yP{=mP%+XUv}VBKyV=9LK3pk!w`@&zDijnCT8}bZPqnXwUf}=@a|`my z-*vtvZbbN=K$y3ad|7ru}UX|5p}Pg5O8*>ki=841d?tzv2M^ syk`KwKh*Wt@V~EwKZmCf{S^Mkx~L!x3wgT$021Wm0}*f|(jVXc56TSxzyJUM diff --git a/pandas/tests/io/data/excel/test_common_headers.xlsx b/pandas/tests/io/data/excel/test_common_headers.xlsx deleted file mode 100644 index 6db5d1bd089f9fd1aa040967b60ae93c9caf5139..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8646 zcmeHN1y@|j)@_1oa0wnHxVu|Oa2f(Zg1d)q9D*iT;}YDRG!irrpmB$w!8H)vT>@Wc z=DjyFnfZRfd(~^*ySmriXWgpWyLQz%t*(TKLgpa}!29bC9iFP`7ZuCXi5 zS4pAq?*;bYvj8PpJL9`c&Ga*EZ0<9|Zg6FCh-?Kv;(wX<<~bU&u5xs!3k>c4tbqjy z=xx<9Bp2lD>e*$0k&wlK^mGrh3CX3n$&8F5EzaM8p#Q-!f)< zwDFd@8?Z|CiiLaU+Ll$d7x9GpOU7}(jS@`Q+Awo+oxQEKsM_(PTE& zXaHR&DYbx^KJiW(n>|?M3XW;USr}eI`P?E%rNZ~9swDihRIfY6juqzf%Et@F6a_g| zzit#DvNmGZ&y)(F4IG;WsM|qmh|72!zr5*`B+64BIBcg`xM5f!py*s8H*d_0jxkpy zeeA%tT+ov#rzR}O66jmh+xJPRRc+6V!pE_26m<>h>E%7)Io?*|NvjNQUn?#Bn~=Tb z-Yrq@-H~N+$|MXiGiEBN5~ZAkCM1Vvcoc;A z%&*z^wtsFxEONV-_I#BO3c)87rK@u<3r@Orc12}j1ihDchOTttxlNu;o~Jxh^ki~t zj%NArsW?ZeZ;4KR{7CL2?iY@ikI)GUD1wQ_Qv&q5Rdg3kFUt`oWi8-19ZMnV1)EyUl>7AB)1F;D($cgRwy8AD1c^NMG%~kuJ&{X;5&ZCB zS55ELc+7`?$@Aq|4@35upI!~$(Ljc0ryzRAhu*`!KnQj5BHW?=E|LI=grpPrh%~}Q z@(6&2;9<}CyH4DlKz621PIf=t>))C|fLjwCIH&eCY0^Hqrx(J9#TV5rCKf*)rUk>IS2wKyCf$&0}LKor?& z=wt{H&2+DfD)1RH0sijx&WAlroK!GDgG5{}TPC5<_e&-QvXH#O$8x6)W5Im3!(6UB zw%`%UemG|aYS#R(-)iob1=iHp)1)$6uHQGv&Yh!V1$t7Co{*s{h+%sIMmHyDSufk3 zo}(Q%-CC-64%@ZmU1P0Nd{Ck)Ku;LJ7ya= zJ+NHs>zQ1F&-C9Z^YE$f+&-KxF!-wb5dH+5GJhruR7>9}jf>DXtLEO{qi7llEt^s6 zlN_W*MQg4$Iv!EA@>&B=XB#?h6>}to-5xJ0-$Oc2>C|Gq6m1PLBHO0B1&mo{REQkW&1DU>8tc`UTXv*i0 zwQ4HB*3o`7A=mZ->M(<$h@q=dy4~-Fd%Txg**#Auw<=ERQ)?=$;<-3k1N6p~X3h0) zVbf(rtr78i_)4X-(5@_fid~9Lw`)T7q3t7AD)z6~B<8vSbE3Osq6pCs{UWlRgX3FW zKrewy#93;`D(X4z1^2uXl_N^9@KGmm|kC8aH64a@1?cx@CN))IyT3xN%JS| z7y_5y;0}p`z=K;?;aaC|oCZNPdukdx%36g$yppOwW%^G%Qv&4 z&@o6bPmx<>l#WB$)){~WqhYqDv-M4Zb~JlC6TYzMfhe3S^1EqAmV{)P~GsZvM>rGZpJ&{DMX_s0}wUs7{CjaLRKb=#QGp|Lw53h7b6 zWfZmLcZYfD@v4a-owkEvTQbCI%96c~mBxgwP!EZFJWAj9`%8R5dkWsAO4ICl`d4fS z;{0yzQ07Zw{DUZi*Bd6u-e{I{iyl1~TJ7Pn)D%l^^6U*BnC*smkv})!O-qgr6UF%9 zDOPPNnXc=*&8RJe`3AE{cXg0pw{r_>We&@pAW_QxdLciKd8d+=$e8?X4H`Ldk)~nYeWrg7|Lk4C%Y_cU7@g4bq!B0iqyir%J{k zxs?sEw}@op^t1cCwX?XW;EGU7UHlwJq&~sJy@uQm$out7yKKgWVh#?w!Q6AMW*jzuPx9^tyqu3H8nHY|7ma zrUPNch7t5u!`*i`s&0l9B6Xv_VX7PB;4cecjj=RUj2g3UP#F(}!8=lJt?lp+D#0)b zqBbnLo#N8sk#Gx-FKjJ+OE9IfIn47<7K60@>H;158>z^&b@Mz5dx?(8ftH z*dHTJYO3yH2;z zeNVHGkQ!W0PYWCq5xE13o!VA)w+>1Vvzox6{hBw3d&K~?2!+h|de%V{&rvfVP0xUR z!J+yrm$R=Ape1XjlAmtC&9fmweK{8r;T}{WUG20SonczL=>BwS(h@xpEXF}N=i@B_ zrx=V5{#)Pf@S3?@sJr?$0b2y$xAf957;omI^H|l8KTzWG`&6HO7{|UXuLWoOW@@K9 z*5tM)@%X1>F%_B1h+S{pYa1E6^J|o`<$tc=!Mf-bT(Op+MFqE#3sCt*2pGf2h+WSZ zBxs_njbbDUo*5os`JGQ`wXRXnj5MydJD;Jp`|G-H5Wh7<*r+gEO4lUbl} zPF|`05vJ9Yc*MMoxnwgXYkp}j=~0kd@}*7W!q}wA_-WY+*_~(oEtk;(kuw)&QnfK? zD9$*O?WHtNgF(4=msmu{oKYU?GWHbAui}`H<7+#H2mN5qv}{>)myIkgbIWeh4JeK8uWDgeXLnwF><4a%O1yl?aqBjFQ1h~ zVMI35;M?D8APKWb5Bs59IsGG_+emO*R8awdZknGb+uxRY7i$X#3(nuJ+&@{pt2+cC z7bJv{TuWks-EaB7MKdfef3c5Wpf}G*ps20e1#0v1#5IuD4sON?ad3M)aRCJN)>viICG}fH#^(+~)J?_0xi~6y6 zdc<@x{rG!7J0T`MAGPNR5qK8pj#rp$w@Pzaz92k@KD3@`#49=bocGiU!gDOdU1-6v zpIR-20+tvfdY-q_WfAvKPzEg~6SEd*l#$ZKT^a_Os(ZiTp{3i!PE_>sE@@I*;f|HT zj$m4PS^AM7=i|4IDw)uCn;QF+w6n)1ncSjOtKMT5O!)Y|)>jNaHc09@iAS{=<D<%dfo*_h{dmZ&USw2!Rp3}5UlwE6ASC6MRpP$V@$Bs zuP1XpzdX(qUw|>#A9gh~m31BquIokY^GE>v zA861ktoC(bsg0mjeS$CvFL%x{jHBT@1>-Xse|o`;t*CWbArWv_NEcqm#gm_CU>v4H zu5s>R+S#XEcgvbRDM-VaDeirDxOEZ&6Zbpayy`HjYN%yVaztg&)ZCwpHoE`ugJjVN zc6YdaB)B#4HIjdZqC*o{*b9}rNPqOspY(p)Ya(0e_GWn1;?Biki zirE1p#3TUc<_JMDqmxUfGfoO`)2(=ap!b2HKFxN!Yvo@19EDCda+4U$Zl;m?sDVBz zr9%EA^1NAu8>zdU*0Yb_fEABQOsV)YdRgUZJ^ku}MeH!Z?q(I^mJtHb7 zE=!?a#XVn-Uf2G^^6@9+HQUsLC!yCC!;~MJB2t>ENn8ReAr`Y@bm|TCBPP598x5l`9~&o9?kYlAdxy2o(v1-;;!5qlGJ}%PSS>D&5*#Y6 zoSXwah?&Chmh!J2$t@@v2(72T@XCn7QCa4Huui=O8E2kC-sFn=3{%*NMANlO@*>{% zF?h}D>r~xiY_ur8*MdamV@pPJUVlD>3$zOqGYLk+z#>@)3?;gIC71C*&wf84Z8}Zh zAtIr?b~8czT4dLku&O7w?)_Bm2w)WCX#Jy`1d%i*iw(JeYV-?gHajot%=y;WaZvDA zPc5+FYe{*J(e8MvuJ%XhtYq8Lp9(0^Y4g*3cDe|c>PW6Ju0WC%HqoK? zH@e-8DIpU)Y7M3hJuj^;A>%m~cWAod`tTEjQUbvz+D1C@E{On zqd}6|mFv}(S2I4&2Y56YH?{rQx(;r(?<{M@nUI=Q^G0^Rio7!{(?7uWTBsbQ5Ibz9 z!^WANbX4CF#Ha<;>!aB+Sor4Y7NaIAiWb+C|H%G9!)j){&iux_1#n^ex$gcS59c5O zeO@qpJ81ylPLlp^zduh}U~3Bt7cl2Vm9z1 zS7Y*9ty!=!GFNSe`Ae;(W<(pE)O@WPjhMlRA48JoPWa+tlSDH?V=@BVu7v^&Bd=2+ zmIUJ!M%wYBiZv!G!z?pW%30FmA!~bSn>3T!Nq}IQoKbG}NE_LKv8K1FTP5~6ZOXJ- zLs%`Pkp=q5YzHKhiR-iH^Q9@)o3Xg1`lMA%NFmC{2H26pGK4P%vwaQ1wX>}GVaP9e zYZqz>IChBf?11Pe^Rmn(o|m6&@=bRzZhS#L-F@-xS&(RVmhm1=mdQZY5(|^U=XFu3 zoO7`HefDnDF^hXa6N)8`N|FVHmhAZ{1&}4;z?%Tiinl|kI`sxD^I>9zk{OA9S&h|K z=5=V_37obn7$NS4A|Eqo(pIW5nwCjaa$hZ@yBpvkKUF4CO}?5?%w3dRLZUdeR>soz z3&!|_09{cZn*p=0*QH&C(vpWYW{u#H37S4+exaqzOIYN0%KthP8Jg4X;Zs_-jr{`{ z%Re5|zx}yaC5+){bxpm}z(S(0!y7kk%-AI=(tQ~HWCM%u*+6bWiRfu8cSqdwKE$^1 zXETi!3kD#Wduq)1P#dSci?T#`1l7kLY&B z^~189rATHq=ikaGg1xpJzZ~|&+`_5&_mqGT@MNb3PEaX0jR}7zD%jctWMQu90{xkI*=#ExB^#U2FF#c{C{S;s$IxEFnl2Nj`7x@XV`qpW2qa%5n%IyA(~Cxq$vm3Jgc4mUnAzm~{%za~E2gZ9Y%- z6Z$9*H^9=t3N6!uu7~K|7z~&Ra@-hFJYDbRiLI**g9Vu?qZC!`7;>agE3JWshHJ_M zg;-)_1#LnI0*CZA=;O$eIX7kKx)zTCT-mLOw zb9KP9Gj5+dBv?6B=1pl%%tAS7HqS-`7vco7W;qgXe`!B*v=##?5;-Pg55cqgp;W#4 zV@L(k;arE2?1uckhLODoj>mFdT+XzDXW#z*l+uer=hU}HuF3Q}=JJ-z4XH3!Vc$Xb z(GbY@I5#WRLbIGI`cP=>?vz^H=){cMY$P?Op+s)+ zK28R!m$G?KMJEUt{-7DX_UuMc?0||x5)w%2gU#Uq?Zi`<--4*w#>;ee5|{K6XR?g) z^du&j>+SjzH|&9n&`S`9ZDt2X_`WW`;c}w%djHbGll7=|(+Oh za$eE`x217eOXnsOVYp&K?Q6bN&dMr!#2G~~dkYno2*UeLXvN0~#h>8Cmf&NS5o}|Hmy59d?_%pBx{;vOj_2bt#zb3)|h;#t|Q@}4N@n6BerV0N5V_^LT z{v&DlEA-cl%pcHMoPW&E{2JibJ>?$(bcz4-;(yy={uj*8sV>I w`Uf5W*n~&;o3;K5|LaQlXSglJpWy#m7uA)J;j;?>V8B2AaQEh-{`u|y0027e^#A|> diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 55dcf8580a32f..9f5e7a7c6f4c0 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -576,8 +576,11 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value): # GH#35211 basename = "df_mangle_dup_col_dtypes" - result = pd.read_excel(basename + read_ext, dtype={"a": str, **dtypes}) + dtype_dict = {"a": str, **dtypes} + dtype_dict_copy = dtype_dict.copy() + result = pd.read_excel(basename + read_ext, dtype=dtype_dict) expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) + assert dtype_dict == dtype_dict_copy, "dtype dict changed" # GH 42462 tm.assert_frame_equal(result, expected) def test_reader_spaces(self, read_ext): @@ -1278,24 +1281,6 @@ def test_ignore_chartsheets_by_int(self, request, read_ext): ): pd.read_excel("chartsheet" + read_ext, sheet_name=1) - def test_dtype_dict_unchanged_with_duplicate_columns(self, read_ext): - # GH 42462 - - filename = "test_common_headers" + read_ext - dtype_dict = {"a": str, "b": str, "c": str} - dtype_dict_copy = dtype_dict.copy() - result = pd.read_excel(filename, dtype=dtype_dict) - expected = DataFrame( - { - "a": ["1", "2", "3"], - "a.1": ["1", "2", "3"], - "b": ["b1", "b2", "b3"], - "c": ["c1", "c2", "c3"], - } - ) - assert dtype_dict == dtype_dict_copy, "dtype dict changed" - tm.assert_frame_equal(result, expected) - class TestExcelFileRead: @pytest.fixture(autouse=True) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index bc20f1d1eea5f..092a337e7d7cd 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -245,8 +245,11 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): # GH#35211 parser = all_parsers data = """a,a\n1,1""" - result = parser.read_csv(StringIO(data), dtype={"a": str, **dtypes}) + dtype_dict = {"a": str, **dtypes} + dtype_dict_copy = dtype_dict.copy() + result = parser.read_csv(StringIO(data), dtype=dtype_dict) expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) + assert dtype_dict == dtype_dict_copy, "dtype dict changed" # GH 42462 tm.assert_frame_equal(result, expected) From b63aef2979cc3060a0966bcf828ebc7f35266a35 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 17 Jul 2021 22:28:45 +0530 Subject: [PATCH 07/15] checked for str --- pandas/io/parsers/base_parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 6488c60609dc9..c9990254f5d0f 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -532,7 +532,11 @@ def _convert_to_ndarrays( conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) - if cast_type is None and c.split(".")[-1].isnumeric: + if ( + cast_type is None + and isinstance(c, str) + and c.split(".")[-1].isnumeric() + ): orig_c = ".".join(c.split(".")[:-1]) cast_type = dtypes.get(orig_c, None) else: From f0f3022d62e47628085f5193c62b0fdad4add7ba Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 18 Jul 2021 14:26:15 +0530 Subject: [PATCH 08/15] removed updation of dtype dict from c parser --- pandas/_libs/parsers.pyx | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index e5e61e409c320..f1eabc8548345 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -108,7 +108,6 @@ from pandas.core.dtypes.common import ( is_object_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.inference import is_dict_like cdef: float64_t INF = np.inf @@ -688,13 +687,6 @@ cdef class TextReader: counts[name] = count + 1 name = f'{name}.{count}' count = counts.get(name, 0) - if ( - self.dtype is not None - and is_dict_like(self.dtype) - and self.dtype.get(old_name) is not None - and self.dtype.get(name) is None - ): - self.dtype.update({name: self.dtype.get(old_name)}) if old_name == '': unnamed_cols.add(name) From cf2728032dca746c33c8fe11cc6cee463727ff53 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 18 Jul 2021 17:44:06 +0530 Subject: [PATCH 09/15] included dtype conversion of mangled cols in c parser --- pandas/_libs/parsers.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index f1eabc8548345..2fb5af28e4610 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -981,6 +981,10 @@ cdef class TextReader: col_dtype = self.dtype[name] elif i in self.dtype: col_dtype = self.dtype[i] + else: + if isinstance(name, str) and name.split(".")[-1].isnumeric(): + orig_name = ".".join(name.split(".")[:-1]) + col_dtype = self.dtype.get(orig_name, None) else: if self.dtype.names: # structured array From 0f78c9fad2155db98f1a842ce56e4b3fb3c52d32 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 30 Jul 2021 21:23:41 +0530 Subject: [PATCH 10/15] Revert "removed updation of dtype dict from c parser" This reverts commit f0f3022d62e47628085f5193c62b0fdad4add7ba. --- pandas/_libs/parsers.pyx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 0f3a407bd04fb..5a6f5e2a7d86d 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -108,6 +108,7 @@ from pandas.core.dtypes.common import ( is_object_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.inference import is_dict_like cdef: float64_t INF = np.inf @@ -687,6 +688,13 @@ cdef class TextReader: counts[name] = count + 1 name = f'{name}.{count}' count = counts.get(name, 0) + if ( + self.dtype is not None + and is_dict_like(self.dtype) + and self.dtype.get(old_name) is not None + and self.dtype.get(name) is None + ): + self.dtype.update({name: self.dtype.get(old_name)}) if old_name == '': unnamed_cols.add(name) From cb369bbe95a6ccaf73854d11e24aaa1ab1537e51 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 30 Jul 2021 21:24:15 +0530 Subject: [PATCH 11/15] Revert "included dtype conversion of mangled cols in c parser" This reverts commit cf2728032dca746c33c8fe11cc6cee463727ff53. --- pandas/_libs/parsers.pyx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 5a6f5e2a7d86d..3655d6efad66e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -989,10 +989,6 @@ cdef class TextReader: col_dtype = self.dtype[name] elif i in self.dtype: col_dtype = self.dtype[i] - else: - if isinstance(name, str) and name.split(".")[-1].isnumeric(): - orig_name = ".".join(name.split(".")[:-1]) - col_dtype = self.dtype.get(orig_name, None) else: if self.dtype.names: # structured array From 7bcb504dcf539f9f0c64ecb00a48657434ccf620 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 30 Jul 2021 21:25:30 +0530 Subject: [PATCH 12/15] Revert "checked for str" This reverts commit b63aef2979cc3060a0966bcf828ebc7f35266a35. --- pandas/io/parsers/base_parser.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 16e00b1587be3..f30b54d9dc0dc 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -532,11 +532,7 @@ def _convert_to_ndarrays( conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) - if ( - cast_type is None - and isinstance(c, str) - and c.split(".")[-1].isnumeric() - ): + if cast_type is None and c.split(".")[-1].isnumeric: orig_c = ".".join(c.split(".")[:-1]) cast_type = dtypes.get(orig_c, None) else: From ffb58521092507581af4054ff680c92511f95693 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 31 Jul 2021 11:55:47 +0530 Subject: [PATCH 13/15] changed refernce to issue in test --- pandas/tests/io/excel/test_readers.py | 3 ++- pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 9f5e7a7c6f4c0..f999733192725 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -578,9 +578,10 @@ def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value): basename = "df_mangle_dup_col_dtypes" dtype_dict = {"a": str, **dtypes} dtype_dict_copy = dtype_dict.copy() + # GH#42462 result = pd.read_excel(basename + read_ext, dtype=dtype_dict) expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) - assert dtype_dict == dtype_dict_copy, "dtype dict changed" # GH 42462 + assert dtype_dict == dtype_dict_copy, "dtype dict changed" tm.assert_frame_equal(result, expected) def test_reader_spaces(self, read_ext): diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 668d4ecc78ff8..32a7ac44c0b38 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -246,10 +246,11 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): parser = all_parsers data = """a,a\n1,1""" dtype_dict = {"a": str, **dtypes} + # GH#42462 dtype_dict_copy = dtype_dict.copy() result = parser.read_csv(StringIO(data), dtype=dtype_dict) expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) - assert dtype_dict == dtype_dict_copy, "dtype dict changed" # GH 42462 + assert dtype_dict == dtype_dict_copy, "dtype dict changed" tm.assert_frame_equal(result, expected) From 72d50f42b0f633bd78bb0827e1b67b811d9d76f7 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 31 Jul 2021 12:04:21 +0530 Subject: [PATCH 14/15] reverted changes --- pandas/io/parsers/base_parser.py | 3 --- pandas/io/parsers/python_parser.py | 13 +++++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index f30b54d9dc0dc..5714bbab016c8 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -532,9 +532,6 @@ def _convert_to_ndarrays( conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) - if cast_type is None and c.split(".")[-1].isnumeric: - orig_c = ".".join(c.split(".")[:-1]) - cast_type = dtypes.get(orig_c, None) else: # single dtype or None cast_type = dtypes diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 35120c7a791e2..af253fc062632 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -4,6 +4,7 @@ abc, defaultdict, ) +from copy import copy import csv from io import StringIO import re @@ -25,6 +26,7 @@ ) from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.inference import is_dict_like from pandas.io.parsers.base_parser import ( ParserBase, @@ -80,7 +82,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds): self.verbose = kwds["verbose"] self.converters = kwds["converters"] - self.dtype = kwds["dtype"] + self.dtype = copy(kwds["dtype"]) self.thousands = kwds["thousands"] self.decimal = kwds["decimal"] @@ -416,6 +418,7 @@ def _infer_columns(self): counts: DefaultDict = defaultdict(int) for i, col in enumerate(this_columns): + old_col = col cur_count = counts[col] if cur_count > 0: @@ -423,7 +426,13 @@ def _infer_columns(self): counts[col] = cur_count + 1 col = f"{col}.{cur_count}" cur_count = counts[col] - + if ( + self.dtype is not None + and is_dict_like(self.dtype) + and self.dtype.get(old_col) is not None + and self.dtype.get(col) is None + ): + self.dtype.update({col: self.dtype.get(old_col)}) this_columns[i] = col counts[col] = cur_count + 1 elif have_mi_columns: From 57c65e5b0b39b34e26c2bdcbb58d6c51fbeb2632 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 4 Aug 2021 18:50:51 +0530 Subject: [PATCH 15/15] added whatsnew 1.3.2 --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index b136d6207681b..465e5af38a06d 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -23,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :meth:`pandas.read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`) - .. ---------------------------------------------------------------------------