From 6a90666c54592cbd0c396410ba0f9d69bb92e677 Mon Sep 17 00:00:00 2001 From: wz337 Date: Fri, 8 Dec 2023 13:53:06 -0800 Subject: [PATCH 01/10] add device mesh recipe --- _static/img/distributed/device_mesh.png | Bin 0 -> 38461 bytes recipes_source/distributed_device_mesh.rst | 135 +++++++++++++++++++++ recipes_source/recipes_index.rst | 7 ++ 3 files changed, 142 insertions(+) create mode 100644 _static/img/distributed/device_mesh.png create mode 100644 recipes_source/distributed_device_mesh.rst diff --git a/_static/img/distributed/device_mesh.png b/_static/img/distributed/device_mesh.png new file mode 100644 index 0000000000000000000000000000000000000000..2ccabcc482431d1be0c316c933ba31db2b3e29fd GIT binary patch literal 38461 zcmeGDg;!k7vIh*~5S$R)Avg>YU~qSr;0{42xDD>^!6CT2y9aj-?(XjT@|=6lxp#fv zU+}K=&gx#frn{PVXnJY@8VN7sUYJtUHI`W2G=`gncU zQ(!B2kZ2}F_XjN4D2jG%nPTmHl?{8ZwF_{!cZKeEwFVJ9hr-hgiV2o+#<8{oayQ04dpw-(}7_5wn*+`+8K$SbXeT(&;AkVp`j%cgFUE9)r|x zN3}uT>uFy9ea(3MuR~bWMyz`&j9P>A@`wowm|zB}my>{)3)k_DV8*olx;%qUSdxNX zz-M@=qy0~9t2kF~Qf>-T&oZ4Ji9c-IghQh9T<(Nlc_EF-Otk{?H~x0qyI zl6F2*!Arlm4y}E5*AlP#VZ&`thI0oeJ{{xezN!0mLJ;u10OuzG4Fo3*`DBd&lJJ?c zKin_-0lzVbLzxOBp9YL5jd&I_kqHlGAA}!1yo!+cg7COdDtX*I8!#(A)EBUjlKHU) zSQ|btF;JqUm}T(Vg2XfstzG3WYE^(U!VYCt$MT3fB5c57A3Q@&ii3aPCDh!e!`jKbj(hz?ZG&1L_!=m#E%Tg}-T;l5$G%jQ{ z=~M*Yf`%frJxOqY8Q~{Y#ao7Y+_T3_WZLCri_rEtDtNwY(H77K4e%}9z_g`Ufph8h zydZsn$`FN&|00c%AIK^;r6l_uf({WY_;WT#&XrPM5#4v964<|?E&lkT?V_Hd##7(f zP^`k7g`cxTrU-BFZ^RwRTE2K9uZElnt_z`MQ%~x%{D{EshuID>G$d!fOqP`;F2Hs8 zGULFXjnx2{k4BF1QKT}S)79kg-gw|>140i zH6Pm>e<2jh+>+W!(Idyh_eJW_`tcKN zm|vuDvT&ks${*N3A+h+J4ssb5O}oM^i$1SD1?eMcWXg|B-d&Gf zw_W~S1v)k?g@msOt8{X7(W+A={Usx+-&E0x^^`oDCB0&2$j5VD#0qiv6#f53{Pmli zD<)EFE#@e$$UZ9vd>6>_(tOPi$_Yvn&*|5T49DM2{Vs#M!ax}2Z zGXuVLm_Vg1q+H0Mrrt30GdOE7Xz0{~yW)QgGmkNsvlFqZuw4U3SU{}qCKVuJlaA%K znU~VQamo)<~4Zm)bVh1bYeu{TDU&HN6Tdj`)sS(_9CO%(_P1K5*KU_x{B zCaR01-GyD>kJq#9%`l36@i6g?sj-UrVR_rdX);OwC6t;sv)l&XWO)T(0J=!=|- zZiMR)6b#+ilGvBQJsO?w8naVHLs%K~cjdUD4UyyFXZjXZtf1Xn)soU*>be??wW;k- zvdC0aynEnLGiQj_Fh0DIFvjTg4Q8%qE~X~ymG%|8jBk}8K>U3|^)Eo|bHYw)FWzqU zY|c%}%ps=TPWc|7pPl<#XI+=b7p;BDbSqQ&ih>G!xHH-O#F%2_pZ6mi+ zJwa5WUOBlIic790$c(*pfG_ku)$_}F}~H}NA3Ndv37{|uZEXY<5-pWYirQMT%ykH z%2;#Hx~sQ(8g)ALFu}rJUHO*#&Hc-HYooS}K-DXVANVfrD<)*uy)P`k8A-z6P4l?9 z9Mwd$%5&;WycD=fx$w7h+3CKTygBD2XOmCnp6x9EbZq_Edi}_AKHMsd{toj#{2ndw zA&HG-lCSExYkxXmx`Xw;=E=VP>U38Hz8T&ft(mB#RI!q#=;4OqeZW0oEg@3%NwqJy zI`|u^0MBRVyld(w1$GJ5Xl%k)WyT_6UTnv*-2RS&y{4th^m^O!Cg&H?o6u!yK6#G$ zlsVNo`Z=86P=&pP8SjMG4(}G7GpA+;W=G>k>5Mj696LbaNDu*a>gQCrG>SC$@y861 zjh&^%L*Gf#(Y4n82!KXs!8I1la=+CmvzSDs9azq^ytN#(tllWlMrikFJ#ZCuM(?Fw zp?lln)*LtGdYF1$e?3oBnb|7TIW=oC{}()SXWCQQ(8=Mr>;zn^>HO=lyy+}6@hc;| zy`tU9h4)OSeWYdiZ(~_yp+MdHn&3^>2H#nG_4ClZzQ7=u`PKHCVl?+d|){i}MF( zhUxk(<~jEzVY9T>yXDo@f!xSwr~cr!5g2Hrx~A4y>pIVm$N9M9IsANB{DMYD66Kla zee&|<&BBrU`rDbq5Kb-=qyB3L|5M`CX<7`U(L@v!jp$|*C&b!!Xu&8LA2$Mst*~ZI zH?^$hNAJWg@2zH6ceN9UA_r*FEG&r1F^KOJCDBC;MrnA#e%g>ftr({sn_Zm6GcmkX zAOfVYWeZRqfULmI_xv+&J>RcK>vP@^nNd(b`7lF&eC$}&Of;lS<>VmfK4=68$Y2Wy zm=6l_qvC&52ngt3K@jjC*UukSGz;oKy-=Vm=>O1=s{b$wsfb8PeOy(H?M+Oq9n5VU z%QPdLKUB?GeAjT)kdx&$wy^>j{;)AJ0f4P+{}F-U1#^FpRwj;yq+qL`)(+fYKJtHY zaDUMMG&7Nt{)@#C#7C|nr${PdV{byr0bm9&lk+2zl9KY;|1jlN78U=u_{TRsa&t#V zTW%&M7Z(?R%Qt|Hy%`e=7Z(>3GbA4JsELEIy@jo# zg^e}oKY9&~Y@8hV$jSd1=)a%;xla?Y#s7|E?eOnueN2$)pB^R_05j8nb$^KR{?p2> zXaP3)sVQn<^g|9 zkdFNS`(*wt{C_+DEy&CC&&>aiCH}|E|7!iPGe06P(|?~CKjOlJmplZ7AcT~t(04H8 zNjkj2m%jS}lJ@(x(l0_n7+7MEVqe^QqWoZ6jiA4ahMh8nglTkzg=sv}LivTUDZwbY zC9S!mrN2MRCmUNkxHRu@3{gJvqqwy4@bEmRv@f1_xPD99PEMVa^nv+r+4?0o-o$`~ zBnI_W@IS=>L9y&om=yB!zl$#?m0&gYKtfMWEX1dnf0sV|%KtJzehx8)`U0}k;bOr5 zFTImbCV5>`r#Tw=nF0fMTFdcc=TD5 zJyw4&&#(L%LdGH-w?7DaakWR}|K@)x;PE{JzgBtg=K{3}(tfP+V*Ga*dC-+(^`G@NNf5BE0KR-8O1u_HF+p&5q6Dm1il1wqa4he1i788SmKqD~ z7i#iucll1nW!hTRqHc~euCG0&HlYO7cP~xsSNlh+zU1mOzts6b>e^RD=HdNokDy$S zZ!RFk=}ey4B#-8AdR10y?izFF{N}?Ad}LNOh@6t|w8uMzeLQ{|c|SIP#f6Q`Fw&*y zPPd*+C4QT;_vCR&!?5He4j=%pE)_Y}1uJk6)zy?cd|h*3_8|E6(-be6RBaqjXSSAL{Qn_ltY+ZYm@c7(4*HIhf0UPMARVlyLBQ+9#H&;vBh0xB}0RX zMx!OHeh1uD-Xd?)`;b;`U5Wg=Md-{@nFEmE{`kk$U%s%-ZGP+TWb#{+ZL3bHK*Ckh%@y(McJa$Yrd))Wx|a#iGbZ!+1M%Q zYn{@`d{HCa1v+Vhq|uPlGhy4@Zz%!Hff zAZFo#?%^6<1T%VP!kB6q<@H@0 z!1`$0n{Vx~%)rbD6&Ebq&nBvJGTjk$a!M|{pZ61^nPS(wti%_i9;ggFpJD_ZVJ-xN z1hk$$4>DH=T@+3q2|JZLX7k!hH~ZPEzW!p>dG1@EvYr$f@Ncd5a84js()}voNOFR0 z^Ln85Rp>%sWssI#VEj-fvb43t>l1B;KyoYN$ny*k-&!LGSIkw#5@B%QaCbEJ3Ky;_ zloEgnO~G76zO>M0K8_*`#fBRsF9l}xfLBiH<5ZlmnBqT&6{Nqhvw9!np18>=?&`W0?U z2UtBTaD;wpC=Hh+MrXsYJSk3A#+ZM*#aEE=D`SR5e;ItvvgO2eqsxOTjTHU2!83Tb zHYE)@|IvAEuFA;j@ecYb-TEDLOT1Wf9%Jq)ErsPTM~{`5#=g+(c8S@2P;^1OtwyjJ zFiNJEVU>alL(I8w$t1wEO4iA+il^~$1o5*?_)nYDTJRw{>sj$%Ml!#jkm~2X z>WtKC_)ottI}1V&L`(MNxaMa>(fs`0k)>1ohZTS7{@9zx2wrCjLvW-iY%Hl3GuVojzzvNA?7eIFkW+Iwneb(~8SbdD~D zC%ipi2ag1guQ4FqaCg^-fbz2Wf3N1;2CN@}IxCS*t|V&_G5VOS9dT!&Q3shC^=d;% z{&WSIJ}Kgilb&qw5$ESFs+p(x+HXK+YhAY4i?#pa9aN!w??a66m->l-_UTkn34*S&k2 z+kQz5oMwfij?in<-LrVjM#U(NXs8Z*^O{iU2Oc?LjQJ;V?6b;N3e>41)iZMCY%9=V zj~R<2>(6KXSP1s()4nTJF(@q-&_P1yz#R&^D|Edj7n+}vjC>S}Mnm4MYXF1%Qn0!J z+nASncZPucd6cU+)pH;c2Q|!+N<>*dAH45NcH8|~_0)JzweLFY5_gE=3UsRZ0%`@y zY%6=+Jc7G#`K%rms7Z}^#uwChaDG&W7+Oozv_G5mQ|QpvZF&u#(4}qdTKuR!l+`ed zzHZlc(%_50cpqc9I|K0Dp#+=7VyYv09`vdoX`&!m5b1I~EWN(1Rt9-x6)AH(SaV9( zfl9FFUAan;R^QE#OUyZsPJf-Jr_-d=8Wipn!7&QnGB}AMmecanHes^Kki^C9va6mo@C}_Xl1<;bqF`z4SIXfI8b8S+~@t6n>VS zcgk=UeQSplVqNWStyqBhSHrwbDYO&le!%OY$9#FH<@_4wxB0_Q{QGMpt33E(iAnZe zeTMV~{X}z$sUA$4<=1)D0u0fo^F6CL8|#YgV>V1{u3yg?(>2R4AKrSFxnd?g61bl7 ztlf2cT0ite5TIdONr!B@*ko4tQVd*t9%sM-YjkzsYUJ@N+i;jZR_Ucp9QP!8Hg!qA z5MQRbnr_n_mKkBJxmIq%O+PoX7x39OcIWyN0DHIEBA=mhT~^te^wdvf+p<*wiSx)_ zf8?g{D;_l%*>_7zOzal7`w6AX#{OR0lXQ&V)Tw*b1E*L8HN@wuOGgh3T}X6zkW`B{ z(5&u{lhhQZ*IEzB81z!B)GicHm+;fYO{*G^IL~pIN>Y1|!!RPD*(17n)Nun~-$+@l z)v6bYEi@@u74HFNA90iMK=-^MownJUG^6FUO}XtF%UVV4p~n+pYqu}FB$;)QbwPm_ z+0gs`VF7Lq{NT6XA*H#{GSE}gcp@21zu5X_6zS5im!VR$Mt}0|9XFTqRQtSvIS)Jw z#T~t!V+VTk-gxp!oU_OeewvoB+-`p~N!X3QAIRTM$xX`(y^asLvyJ5_ zsmP8!eY!63QS9Nr7-pL*gjgQ)E8MqUV%{h)DpwsdU|RToJLrmnGC=rRt<6)ZHC43Q z#HqHgMDG`X50m4$RpW}PB;+qJuk0>wO?>9^!r2WSl(rQ7O&;M}nLltEQJHsfZI0$J z?(_7Al2rQHjqKj)i{2mZWa$1O1oU1-a~DK8Lt(bys|`a7mDZ+NH6g2Z3-o=PgWvp{ z$O@WjMcq7Gk!U?L$$U&ZkP}218|B&A!3N_9G8T(gcq;$nHkKn;wE8Px{YiqOP!8L}#QHz@J15RUIgz+|XvFUI6}mmu`cni5#4BZ+-AH!UK^2`q>q$;@6C&E3!KrJseQeP|NRAw?t2Xc6 zE7I(sh9v@Qf7fU7*SFYrHmlY$MG@kLvBrTQ!9a5dlnljDEYm2CIN2d0dtG5uhi=cKM=4s0o(RwV zC{yW&v4R`AE~HRb%Bv4WiE5*L^GEasOLmNS;y$UQqMI!#`pbb36|PTSadS8k@C z{nWoKo!()h*{c<0GYr*VI(L>m;LT^SLkz^_f*Re4Er%{-W&cxuTo!_ z^RZesr@*5;x3k%mx6L)A4xuX06nXHf_K<39q&l72SUs#z?Z^&Q%(&V(4Pfp;`@ELA9EE0=S<6CJq$Hf4o&&Xv z-;}K+G_R2-Wz&twi?I@Y_Jhzpz?mzTtRX093{jQMBgDPVVX}L9UbfOdqAEU-$Rr}r zwQfH-)t)lxKAdh=rxMs=%$#CMR*v?9-H}>LDdw=49YOuEndCEzk1I$(N=dWUzYSZo zg+*XrVW?0+t6|siKHEt7I775e`-Vf<;?Ms`eVKjO(+Nxg!Cls7BNGd~sBOx13ff#G z%u?vH$*Ox7y^55F=X;djEH$EeT;!0JbuIV(C^YG+R+y`xU*>^KEVdF6Nta+dMo+ZM z)q}>(Qjvunx?ZyWt)oVA@#TmCizpc%dew(H#C~*jpO3_yuuk8kzHm3zbeCPUAS8?~ z_7HS89F92;vxFN0sJkyDwlOZ7lpsbI6|pHI`Wj}-Bp5RC7eC1N0$YQ+UGD^DHqx`+22R1TQ&1FA^<5LQT^} z*m>X(Z&ADt+a{Ppe2&VJq@mrX8nv3qItJMCgpOls)gTWEWeC#~NrY3%&btU+9XOkz z0VK~vyBD7DZ}LyQM-!B?zdR5NGzxc>1~nlddv^#G@;*GYEES9lyj>V^QMMflmcZ*%17w@eZQ_!3Wx-_vF_pp+`U-SKS%J^=qtLgx?OlNrkOo~D1}82)&x z!P>?MdPbB>Wf|?ei(UO!1!ZS}=f{AK?yT~DJRvX{ljCNKojJWKa?_okGnql6Q%F~( zbqVIp&S8N}^f@i#N1Ru&=ci><&ugn4=@!4#g+-;MlD(Ei@*&>;B z%A8Y`eJQy`<@In5%zB{eCDrU<@(Pe=H$XZ4!}wISwN!p2*Pw@-r!cRfhMy+~RD%iRi!k;~&^C{b=QzbEtU?cxsd2b2lF~xBzSh-T2N>F6nvi=m8yy zJgUYSsWSjF2QxJzMC+>+rjGSKv}GCmVp{$it^3a=S9Sb&T>LcB9?NK6mD&j`!S$f#xloC^G2LA9aUCYWflkY|X6z7@EdqEGLGXhLY{J_+?26jAwePd;1pcJ1+`dBi#9S&K_t(iAcNK5hF$tn960W zBey9L6-huVsKGgjH(9gRI4(u?g`hS3#Z5 z(C#r7C|f*~JeqKTg#6bC+H!k_Mr-t8$9hUr!0{NqcyQ}oEPvJp(=S(J z;^S__hP98&aL^coJu&`dV2D!P-I8b{R~yeW7=Tvh<1)5ny8jf!Dg2y*VXC=oY#iW3(*rGnRZif3x_N83IeLCavtq~E z^MsJ(){jFXpnN}smchuzF^9T9zgQ8nHA9pW0f&CR($upVGqh_@o&$$gu6#7lzUx%U8z3V?NKe301tT@qYX z>-_&iC=lsdcR3^z82D=`0a?V6>FU4bAMptHBxFp*(L=d6_u>g4r&C*Z3If(Z|leGo!HF0WBPZpvw-YJZQ5ssk z^Ulv~5D(EYVHLjH@2x5Rk|7vz78nk`gY6UWTw_Dt>4Gl09X1eDC6JgUOLsq+ zZO9&^rVOI>T)|Zv9SE7fS5>nKzpZlq#Db+F9pzg`kWL%7!m0MN$Dfw>{-K8r-Q0ch zPC@mYpE$xGyKFwgQ%hs6Zkv= z`As-QJGe>zj|g74xlFU8G=u7)aQD61az0_n=y7_Bo%@Jz{%(sXRJY@H0HPPY{^$}R znb%qiZB8Cyng6Wt+4dn|4vODLl}|!AyvADM!n4K8B#s+~EFq z4Blt}PHXwr>v!B3{mOnhe%js|p>~OZ^kE#L`1ayLTl3PXvKB#tqC)ZJun)I9XFT}r zz^`wR^2PAW41L^Os;h4mQ-(bY3A(tIiA#8btN2A}?xp_&3gz0HXh%w`oNaqLxB~l| zvSUmXge-lJ@4*^7tMCk$B?M$fUGo zZ77y=PYcdA*(o=Jsa+)BtM^`^SRlDV=;n)AXrnjlGQ)y84%SmECoh+#Vj0&Q_F~Y^uyp5b5lR zr1?$&fNE5IpB?Oek+snt)qeP*cUS7c2>OCKif4LC;F%lFgE%cOV%WI-jcm;!EE886F zeeLhxUGzLHbU8wn?cVH1UviA)-`r`MOj$$htaW+kzC;u`34d~Px?0NmIq4uKlOY(? zOmE9l|L$yYMLGMW3HKcJk`4!k-dHq0JsHan&6OM##pqLR2(vQWB{eSZMN~g6xOGbr zB0}p?aCJ2#-zRuHJ$eYsUI|MLGaJ&aK#Vdyo5zmBVrN-oKh8G^QUDRzz6*gF*EDzF zw=guI((vy3__yqy#lu9O7B#L3Tm>`2H09F~mLr+5C9EuOs5NY_Zr3i~#?_H;*3Z#e z`oLES1fWhPotp;7mqr+AT}z$gX+n&>MlEUb-am(6Ek zVjxHFl0ixO4ckXhOQT$pZgWQC&=Dv{5G;bG%DB@KuLH8n>;SXTXpne)l2tHfG8yJ+ zxi8Se30jULZxa_=kz+mo?uW}RLudOpvNVw+qfKyd;ISAayJPA}tT1sO{kA};?1t`R zNyoIW7>E2+=v%KiI%&Efq$L@AMR5b0VFTVfo7QMjw27^9brTpV{RUxGK*Z*A$mIN$ zFxHat>Zr{jbhVbog|mkbb|!*!95v`!mzv1XyCws(m!}0xW?%@eg%G!LE2Ok2ayU24 zYl~d$Ki+1({5F@119$p#$_+HK4+6#-*rIwVFt4n8%~zPjo1Ay|O6{$73mgQAzfS(5 zjxjW1e9Q@Lo>Y7Uqv><@v>AY*s&MEsl$l8q=4~;y$Pp>$HiQSF@hAce8hmSy+g-(K zJzDw=HrS~zwz?dG(jWi^?ikCwldvJKKUSz^1;&T=GSxCeHhW5cXYkZ;GlyZW#VQj2 zN{2@!u(RYdMLUK`B)H)#HZQvT8tLkHvCqEJ#R8wCA4F|*)cV&w$pH0{)JWZ$k!r84#+Uy5zqhwG^iVTM23w{tt|-7uNnP=^ z8!v`uReoL_t!mR{mUyOV5i_?&VAJ7ye$J>HbNd$2a`kvR8Y7|cO~Xbv$!SOW$&y?+ z2+iS2D5MJ=?arf=PTU}yQSdizKoWh&$(wl^roOxB{8z7rso-Y}w`jXoz$W8L3zO1L zzGD}Hv~#dC&L1S$+H+ZUq$jlD6*-c(%NT-j=I(1UELCSCvk;z9q9gW|Bw@KY=qazb zAUw|s@-h5j!`8A*^N{{YlrSkjlBs!yC3j!Xz$_Q<-YBJpi zT4lM(+TtfPmD=SWG=ZA}H8!o}2#jAJtT8LJHspRTL#r3TYxs_tjRa?@D#nI+k|aFdQ6;##M62#S~oEEMaMYunGA-U7`X$i}UVsvu6nQ_|IVA+k>E>g6J9Fa*-QPBA6fY*S^ zhW8Sz-(vx@Vz%En^M@01M8|vNgu|>tO+`)0Nv$2GaA5BpYL1&n1zO`gqe`qa4XnD_ zOJb17=}qNzv->E*=s2K99i+;P$#J9_=TpgvCou6X6A&Vu!X9O{T)FtKsPrIT4&Sk> z9^gkbM8bk(`%x5>xvbooeSb<1`&sjimTL+IYT*%K;2mv#Te|M{M9QGKfyzR;<^L`k zhsIlC9zI1BcFxiJHPzD!ziz0>Y*(#XRT`FF)Z7vonv40DQygmVV4-V*Dp{zyWd9!; z_T53m=O47yiCA<2`+`uKu@Eg-)Q21Ciru{-s;#=la&~ckn?z9;mjA|VeE#(H_VO3< zKNyP#NeI2*LdP8;M(&C3oSNbiR{#!z5WY`3IM3BZ4z`WHJrW|qdtaQ%n_g)b)= zL^X%G)^zj3%9c>k?Gz;anM}s&>5Ke7Sdqm9h(0ana@P#tr@U{FvQ9oyg*ZoFIKpqy zuB8pXu!tiX=W*=u$jS*iac9DzN&F*nDDPv$xEg!y^Fib02gTZC2i?V?{tGiA$P5`) zZ$PX={$E&$ZxeY;%aF>}(H9Ne|6oxdKM%IR(9G3XuU?J*7sOHn@gd5f7|IL#A2#1I znU8^v=NKm<{)2TPMM4i*2I6{L+*3W?cBaFztS#PGcfh2IL#6ma6y$~cNY3!fC8fX^ zPPUS`Wgj|rAz?iiUCFai6C~OcTL&)-Yju=z7>_4PP=4HlEXif z^6Dr|rpZ_;OvMLg;O;m)hC)-ylgnNV3-V(fj7z({m6_^rA|qr*x(#*dHx$QE5%@3H=4j65!1M=TH$kb1CcQ5%ZdX##II&%V0NcNhG8 zHM2#;>U3+Z!T=V@0I71Vg$-W)P=;FmjEeNetaM`DK3>7!oW~1RbDbKANQ{CpwOOF% z>n~3{)xz17Eewe3oIhjnF3SA?J7585=i@%mJc6m{lIqnBn)(O4hL06pp;=R<443qC ziAUjuR<75$)z~!LoiRQ(Fw9&hjSh{{cAomC&;2GIBT0B-RLmBAfWzBm{BK;9a-XVo znl<=o3s*CbXKOp1)pK0dt-YTsQS2`Um6{`Of5cwIefS~~LSCtcOz`z4nIvfi!iHy( zjgA&yk*qr^ae@;al$S=6oR?-QZc-==$$PEl_+RUWRLX1k{C&T7n90@0;XEfH;nNek zz>gzREZK-uzAe*8-G9&uZMo!DA7~ZR+h=?Q?XUWP<`OTdw1~_PFZ&u(0K?Q!%86pqes7Qrn5m_;G{>c6 zWBp{ieyOc6{!&!B6NPc={NfeIWH-H%vzzItP(k(quo5L5;{!cRxx72o)A|DeKQ?Aj z_Qb{;e9Tk<4D&!_5;RtC6+e$ogS3}ig%j&?TI(^|;gak+v1L0#Ef>pbR{Gz$Z-YcB7!3>s_I|Ek}pDSUXm7d^ZbKX};8r zw@!ZKdb9#CEw7Vya@KNlJccv;5xN@2mw!{`16A|uB>_P!Z_sf`e?qJ8J2|QJ6ABX zjuoiOXgqNGtKaTicx=l713jZmGXx>A$eUFS6P^_YJRqBIm27kLF z@qsTgPC7%c67<}eI<8A~Ml;ga>Dd&oH5Dbwt2?`Je*xWQbRCMsw9gilV9fd^WH3Fv zwtpmI_&GznwDE`AI&Vjg6DM1ie(LE(uTK$ad4D8-3F<(W$CgcVOiS>%#Bh4%x|B5f z3>(Ska?7|H-uHMW`4xH-Rf_Z2aLpcU#&M%loWrV1ce{T*SRe}08-{S^=&%O|o+;u!`Ogr7s@1p3urO<5by!!K$%Fs_7>iCPR}=^1 zhQrHGb_=Tanst;9obI=K{@th#1hqec+bV#(YzxdQKZO%;6jIi1agHOIxI`eXOVUGg z(1hVtet8t2G}S}n&qvToducbwthJT3{4OC*0={W52^7&dp_I8D9p=jRDBPZ)FLSyCqVvdD;T!zru(59M79B(UClQ$1lR#-pyJJw zGqla8--vF^4MEl=k?zi^1k;{oiQoS8ShtR>T=@0j4_{0U?h5USyRF0*fNVM6TGI8U z_{gP>Pu$xtQD&_HAw2Vm)~iep1jT<~*W%^nMvfqSF#oK{r7fMCAi}=Glvhc3p=(;N zZn(K0PoN0RUt>g@VFSm;rrRF2bvjk~71iwB2J|bdqfgx}(ctb0 zVa<2Q`LhZ-YF_JUo4Y(6`1|tUW1}rK>0NYHxvrBnOOar=C*r@Y@ixhRggIW{i|F~G za{?#CWt1Jas;X}pbEh00M!hC9%auUH5P)MbsgzPAj-6FAhtB#K&lHUjf#~2XMI!v_ z!iF48xgP$!S}Bb$XH8IAHKPhZ!S%%L`Ec=PEy6RG61=)bHk<V8_-U33; z5~`0N1IWhr)K4?){nMtze9siXxHgl8``QFabFqQVa)N9vD&<{%2G8Jr(|^#EhBDD` z1j4YyoGZPPf9J#kS@BJATjVa==PpSfX6+kb!AgOt1oHtHe~9W@ixk&%{0NrR9U2e{ z6hnW2uNrs#1;t!cx3^bk1_?5R5zu>4y8Z7@$DE+_C}-Q*nF-uPPO~apPl8`6EA)jc z9JfYJ_^Ul~`1!wIp5nKp|G_(ggL->863Y*C4&H85)otZ4rE_Lz~@0p zJ$5)@$qM1HNv8e0$OZh@OCx5%d4oF;a9&Pf_U-~&ZF)G(yCZBS`Se5WpTPM7glQqKr)NnYI1nNQGKSc9Sa58n=-L`njte&E}ISd1DX(m(ljdP))Kq8aXFFIS`sT&N}!sQP|iV1*qt9HTKMo(qcVn{zuKVjpZT-MR7A+31qE<-KKKT zAp*=eopUtKIk{X>K&U%oq=#*@M^8H74;&c7Z!sv-=xLd?7f)XV8148sG57Xqii~O` zUDFLwwO?+o2;))7XPi0KZugr9;!pRaEp67-K-ysN3Lbdm9?shYmH7t7I0$Y&{mKh?V z&xW(V0NgAgUekNtk$zuK&^?iGmj1@*Z))&THnc%8dQ`=_eFzYW-{z)?2Ns5AI#zB* zl=#oirH_0a2I;z9UmN3DN!moZ*S2Fn ztZo?M{JBfpMk~9pr(l3ELQT|q^NLS$jS+OFhyko-RSO52LHPB75Z;`v{5$CSLFiZ& zinYrav*#^O>Lk(hwaYVGnEqrdX@pUbEI-;YHG)EH1M%mw2}UBVYF4opOi@;KKLn01 zU867w(Kh`H_iW*8B9=YF9B+A9EVH9B@y8FKoKB^0^;}WbJ|XWQnBnpr9_TceS=((} ziK$n=mKNkp5bf?e`#0|gUANqY_g#5&=R@~ad-?~p`v%gBHMR-dL~U>U@?iB8wzQ3)oe(i#!-^o5Qfhm1K5!<^KvaB3}Cj3fZm^ z9;@h~?(MwCA1~sTv|7dD2MF5n9}GK51RAB2z+KK%OQpnU_Gzh;eT%fCtVmLvZ>E2+ zL*PBokYV(06%df}fytqcq2&b~T`=o%tskcXMOegQa^>hEs39 z8!i&So6bBurs%hp3*+$`+*t^$(D1S4seKJ>Y5qAyz}Vu)@(ali*+F>mn-g?eH9)8j zAI6pj$;ls3FyK7KG_F@2G}WHXAQ6z|Ws@|Qo2wQuOm3d;-qYM6L_5Q7J_6Nnz}-)R zyc&UHVelS#6za4M@6Q@x2U%@*Uo?-Xn-*KvZ5J(G#CFyN6Y{JG)H?~r$ga@zO0HU} zFClA_J9EBo9qXx(sr<(b3AD)r;EU;y(f!PBY%4Zgb1h+e z543XI{7o&-YBawjE=bLn(eH(*>t9x19@9UzLflX(!exOx2?+PLn3=O;g-ZzYp4fni z2n`ach=r>FkZd)P!qJ2|FrVPU>i&BBoAM(%e4^I4-lee(0xfMVeMj!kCSoPpQ)yc7r7&QEgR%_N75Sd^t;*_N@9xVEjL=liE*h(@PVBcYjrCX3ux zYj{c0c9-~qGzoq)MWi`xud_FeXbXZI_B4sQ!`SiYf99yFu24$A5Urt5sR2?hj_E*S z151DDjTEP!Bd@mAEG(#Kb6pN*J$DmGoI1-YU3lYIh3}iay3f@a^@ph;@I}S~S^rL4 z-w=3pafneAX&kp$LebQ02da4p{%GnxjU7hs*k+#Av*<*z)8}Kqm`7v>TcAHg0K)uS zKl2%dgvtQksOl*r18bKf{kDy*se#zziJvO(KbeK(+n-vXgh&ucuvP|R0x5dIn`kpC z26bQeuF^_`_7dsgcntQQ2F8ogX^+1yTbAOwf4~%^=APT3RuAplv|X>)diD(ameV7R zmnwAKJ(k8wyA=JZNP3*0RxTmb&G9rEpbZf;RJO#ZVN7TsUk0A3PL+ zt)eosjkMO!s5fLPfEP&YR&thvJS3KL1eQhZB`vbvqOV?Uea75$lBFD z4Eu*^IL!4();Z`|$*GXHFqLYYpgaoq)dg}#)#lvU6R+gYyE5r;y`T{2w)bF@t2{T& z(>ffEkK73T7rv>$Lm^OHU!NJlskBqz->k#5U$zfQ)birUh3r+e4nt%eh&DG>K~p{c z1y+a%7G(oF!|5IG3K_Fpr|1+GjJZdoi7h=e#^AuR$nP8>^*vvh^`gI2AjxX_B{j7= zGlWF7QeNF_93wU_yVXiOT|E?m8MeDW&vO?@hs`k?J$?YX@T1FC;aS`=&CvouP9wj; zNYV^1tGxdT0j^MoN1Fc6BzD~9H-_e<6~rx)`0?}xTR#zxP__fZ|4cw8z8wCRRH_wy z6BOh(efL%qgD7~`s8Dev?MH%qH7_Yw-|x+!?qHwg+L%oEv3?vp&uW+>QzkLLg2xRD zuLl_)tEQq)xrK0CY8q!fLn?2I8?RC&Rs_@zPrp#_G$TDSW6i*5M|6%W~ zW9s^XN8jR5w53IhTXBctR@_=B?(Xhhw75IPt+>0pOR+p;3DsS2O_54O$X^V=ybrUPP*zz@n3RmcU2QTcjm)PwbCVT2zXWe8bd` z<#_voE;{De44kLrsb=tDiLA{~la3EA>+I&q=l37$p!06qNz)j(4?Q=z4b~Cf#Wd&^ z23D6Va|kuJP2~mKE0&48FC%MoJn@$&f1E&dG1ah20&PN}hB7raO4-#0dGBEr&Ab6yI?w}+y!gjPu}t&aOf;>AxZ zQMiROJ3hwu&}U?>P0&j$10E4IrPhy$^*CkKl>t%7j@9Iig`Y*~rIhv9MeqHTF05J< z38jP{9Lpb$@+k6T!MP2TF{1|4IPtN(Iekm#VvNMV1?Dz*Nsoj?^-|BzhGjrY(g$ns~ zNky3$&~Zd?m{0Ki6eHw!Fk|qmGBI?q9w|P#fTxOE>Juw-fahGv0PW-R zIZ`c31BO&wMkW}mA(UFV6N4#!Tr}_Y!Qe*mBQ@1FJgjPxA2Av!l zO_hZBwtVHiml|=-%m^Y7*$Z6B-(EULm;FS0V}`mXgruneS9_w;1Yf zS`BqR`?fjIOkax0Xi-4=qL5fp0N=GVDLSq=$7y7-dU7myr8OJC&fPvFOak#*jK^C{ zm-eE;_W8DIVzj3RP7{Z%VRkxr?Jq>SSg*JOUs!do&MFD%7~Vh1&j2}>FW;GR2G@Uh zh2irTl%EI$A_zF%2DLw5U5mg8%?jYN)e|t9?9|%l!w6UgMm4bbUC}m@FuwgAA$WnH zF9VdBCRHt0N);UQ6@8C=pExwUYan_QknDykSTg3X5Dnd8gpJ1i>aREeyburtK*B1O z;e%vPm75>dU{_h6!Pq;83c0XKhrM3s`JOnsNt}A})`~39F_Ql_?z4Dyuq zv$A@&?&?1w3NgY5pddDv&kjH8?|=kggFn~RvQm+g{(UE*_c;ZBMf~Hi%g4VX836iC z29z}QBQf&d8z<6%OSjD1uK?lxdIX^ApVHXiErQ$S0VRj+#xE@#rbGrE!7rGv_lXb7gc$Ok3$ghP2_w0f-$WZo z5kR6-$gse1Hfb)Mb7-@14o>-p$TL;a4-W`S zlwtk%I`l^w!8OhW2=TuEiXjLHiwNOg^dDI@0V#U^pQ#MbbZxdmZ`b+_ekVORN)|Zn z;m=tBkj3ti zmdb6OEulE9RE6fekJow)=M$2-)~lTpIpXLp$1PV`^ASPO6es(`DsuZrZU*4yn*oY& zrXFC>Y*e7~M#~c%(a<*XP`3nlP<}%p2)8{xNImGlZ5&uXbjv~TE91i!Oz`yb&bdBKo69MBVwuDA)4_;2!v{c(e?~o_jtii2rE#%WNneb=rZ` z;e4DqBb&uMX#~$@;-XC-9t8a_Y4#<3OGA&_bH*&>Wj0lJH2xOHn``ufPg#CQF3nXN zqe<6D@|pSwF0TDXV9;8Rtzs(Y>E~HF1Fo zCPBCmSR)=oKH!Idu77uVIFTU$)o?j)ma9JyE$}g(R*hWC>!GGtrK;DtJDJ^zZX}tN z*`(HDfn3YuBJPIQ=|~-$#W=Fs3`8f6FUH`0Z7tyHJfq%pchqDsTdo5>tXq-zhJv>! ztpdIaION)hfI-ynNUpIpOpVz9Snt+YO5gYWLFtsutDCc2hjkITQs>k5o3gn|1#+qt z#r!nlx2d<@+xvBMRR(=&Ew-Lru-@m(^k#LIapHE{LteOSVn^y6xrPn4wsT3+xu06y zy>sL-Y2%-&x$dIL^Cb(2g`r(!wJN{hx7`kY{Uo$jG|D?<@9)XuB9Vt__HFvC#`^Mg z(hW#`Acel{yPbf1fqIMJ*MV+EFEL&XFUKNg$o&%o@R&SVelW;REgf6gOZy4p#%WJI ziN~Y$OQE8Gs(SP_E&}O?M;Kz~`5Z5UxvaQKhch-xwaic!?v}Lgi`RZ9OX=49o)0$B zu|>)v$!u2d`m;dU5Bf9bF18C$prP6kF8y=%HrU!z#OEoi&km`gS-cwXv(>{5o74VB zkU2qR$l$!N?o^S>tlEnIP#636o>9-fO6qK&P>>o1Y$^py?`J zYd%M8)p8Lp&Sf@3Ggohut!~``L)0@Lip?z5I4Q$LyW?6pB$+^mteXB7GjCXn(5ab` zKbUGv_3hwLq)nUn=sS*$6ibbb*u81}%iy=*;rw*T67Z&P_x$~I(a0@P1P>0HS-NcE z`!UmbiCl@KYTv#`8fjCIgwLUumkjG2K9A!UnNm3g;aS+Z3a7GW-ZfF|i+ejca6P-- zHG$WlQj8b-=2zvED%;$FB7x~b0T;nv)1NBp1bK3k<_M?K+o9>&eWU0W-C5y zBDmKm5=k#}ArmhT=BarIcBz4VB<7{Upm=5{j6rv21^Lmz5rt(8?s4N_&Qgn6MDv_o z`S{gmFWlbgZu93fTXETV2sd2d# z0xvl0fRTIUeV3CFoThPiIY2gc6~CC-j%*fwjX5eempweE#w^*dq4LyLI z;&gn!!F8)fW%-A_r&g-4|Abw7Z0$16g!#M+5ptpT#{w>ETnf7tii*|UmpApgha>DO zZ$i-k?VAyLC|qO^jz0}jm$58Gr*b+(KiplV@Y#DSmT4{pQU!MMxLu0vPIRzG^NZ-T z9w=C$_$7gCe;6TjqA^;i%#CX_D7joVUKo;0S(geUV#Ge}TQ{BlI20EPyge6{xm$4F zNNZVjC7CMMd!UNT71~F9HZYJjs-DJ)1d>HqO|dcqkkK3ulhe(*=kxHmpiIU}m6T45 zkOff#m|T(5Iyjmf-IR=y4V>fqo>+ucYk1fRr#@X5Sy@mwIUZmjAqCD{9!f{@^*zyo zoa#H_Sq~COj8JyZHx8LRznpZTH*yjjduZ|hAhH->A`68>wlRjgB#3ptIg@yVyx9m& znb&3x`@MS;B6&+u&Y8YI@1}x$@j847S9gN)MHcU^kauLYTYVW&l?{Ch&&Bq$djHuP92LI?U z(5y8d=ST(p((llcF!y#0QBDHqf)nQ@B4VR z;$>JTCT|`4{(EBGTZ|0RDX)fGy&y@r;$_N8KD^%yE$ygN72?ejiZ#D`3aGqPIWq^J-Kx=9W4T$Uh*C9^xPS|;j&StAdmn;~%MD=C`XV-d{Wa4J#h-qh^)OU_6sPr!a5<4Bb=xn^Sb!g<+Uyrb`4hHQNjmJd1a=w} zwNy$MYL_$&TAP-Vl_9L=GZ^(Y8|{(@+MqNOg36lTAM-f3VOs>G0NE*GFjUU-dR&;CE(~~a>Q;t)@ z5&5gE?+HR@?GJM)Rw9?UuEg48T?enckfT6+AWE}FhjDM%Ai=S>-bbE)a`T>1Mm&bu z$#;RTVedF5q;gl&9wUi_hKWUt$&HZ|fXNSIm)TKHAmVpxDU2>hU zBzf0g(ui8_r^26@X!NRg4H7gks(FiOSxQy9hVP&Di{}LLFE4-m1n>Mp9fp_2C8$*w zoo);;VO2LPW;8}~@}g1RDUJW>MG;&?4gfz3xsQO%kQ@XmV2)dI)-mPY>pb0v40Obeug=U+g^W8-)&wH>@RpY0hf|WXl zZ0z4Ui)FDA6EurqHO70YbN zs;+1HKXaAlWZ1X}R{Uq;VZ+fV>t0?P)SI#N2qiT$o zn#c8=;YNqB!vUmWG~xPGaZKM+P2h|yYj7yxcK%azGl(MXgnYzysKKH4rmT_Ipc_Qu zyUeZ;iS`k+6N}yA;P)O$@G$l6I4+;}w!1k3ePL|8VRG5)TA<-*^9hlmgS3%=8=S$N z+qtVVvY*j=uRy0UD7Ajt{fz)_LZbwIR(CM+m6tTLAQG;O!sq+LNsW!^<3|d41%~W> zi@EeGmiNc>Llu4CptyY(-rliU$j+7Yt`*sBL%Jn3Xn z+9ZKM`XQqWX$+)0FXgxzH{zdrpd0x*~h~XgE3XFjhM`$E|{xnKf`-IY|0tE(0 z*0Ry*t%R1xF8FmI1|&2TPa89Wfv?QXLbx?FmT@1p&z92gb5MZ_9qihT3sY;pNFG-h z&pv7Uk!t658n-5-rapX+`_vKM>ekQW&Y)>@NHNp4LI3RRC?$RfGj(xv3Tn?+>>IGlM~qEAf=sc|e3u#WHs;V!i8><&iUgk~TH&+Kn=l<) zje9bq>A?Vuxq-uPhdWFq1y&0Eg@$nN{J8j>+ZREf{3X9#I5P{_DrRVVQf72U{C*H= zq${KfuOaP)aXrP~OaX9HxB-w6S_T#W55)Tl-T4|o2q6Ku|3L`r0J1Z1$18{a56OrJ zc!x5};y)OM9}-&@02%B1U&;PMUCII8(V4Xh`!DY3cLqQPwiX=CZ=myMzw{#mcn1bD z`=9?k|K{&~&H%sW)JFJ^7&L&e<;L}YjE3J5Go=Ai^nxHH|09MDAgsS|7xKTdM*c;c zlp+5W1O7QXn+C`7KeC2CgU?fk_SgRrLm3cO=chH)|D>{hJV!)!eKkmVOuE6^ZG0;V zUbEX~XX@8OLceiPDDnvOK^imnhS0*{G^(iyGTVL83=R{PPl# zHo)I9g?lXGH2^N651;Bh+SeCdS}XzfV|-8UEaFMzanM;MAzuu~mY#Lf&JM{DUT_nS z%lt1uf_Qxr$1kI0r)T)D68IrofsQ?UOTi3)^S|HP3((cgP)x(Wxa(^f@O1Z{fqzrm z#oDz1B|L)lE&pxo7d1pMtCkHD^FJ-UK1q1dlibFgg!`|mt|h+ejwpQ}{wD$cEA%~C zJ3H3(5cA|Vn@*>nlo!gWD~4L z1l8cns*|Cav;0RzIkXTc3}R}n!$2(i?+K$z3=Ca#-XkDH`1cqzc^+C8H%z}@!C-CqoKG^|@wl>-57VDY{sLI!san0cYUKkQioqoFw#oAwe-+Dn zZI&$>5zp-c?r%D7#jQQUVKpUXHJd>Kj$%OG2bCp!&KAjipF%00Cl*C4tiRSBLM<;wO|3Vs*|#A>e)YQ^^1DS`xAAbbq>tLa#48 zjPnjKHz0sZwcoy0Ey0XWL44rS{C5f|Z@cPrft z9!nQ6hmy9nD1}23;vI?-`a3ziB$~L=5#U)#EthX$4__XVuZw!c@Ak}R3W@5^w+-pv z$HGMOY2i13qbGA@M!*o{E1`+xFyQUDp#>I{;;w|%?8o@;9CrQF7Aep($C4l3uX||` z#yqOOuQ7{{*)KE3ncedAhJQ3!G*9C33M*cz*C_l#MK&_VEk4Sd?MOJc>;b30sQ`h8{A$6~p2~^c9G#;beXulVNhqS)-2k6PdtA4O9GJ7b{8AIclLeey9 z{y-r7@#c&5pXus<`IPIHSS~d(SuTEvfrVsfnp?z1-`Fe#Np=Vd=ju*#G8tZL+f5#G zXctcvFDO5%T}L!OsTwUg%0()xHQ0x%JlU_TLR4H1p%M(jJrKk&Z5Ae0^LbvxPgyuC zzRrp9dX%{GnCfOzDDwz@rQ_ykkzmE=qN3^IRr!WeaqxioW}AMq?sK{+hYT}tj$&4l zp+{H@4$LNkw^FMu?OeH|%J}VY?Tbvu=|ijjvY0LLY*+<@<7KFck}=T4(IB(IY{uQl z?)lzbODwHCOOUeL=>-9qR_h`}1#E;;Rc(vGW-_gGo57rs4OzGS1EA97ZMvAp+ zOyf=+mu!bGz5Q&AjtTZtAx zClw)KF%VjZe$x8oFjNXlkEPp!leR2~fBcZNF{JhB@@S4yFPA!bsKJSOSjdk|)Sc&O9iS#+K1QcVttwMwZiAGaI`E^4ri{h5HW2!$nixl0;_2z9&lc96$4=>Ryew-B!NagKZ{D zusilc5Bfx-FqHfw`^u9_+76=>XN?tQNirzn_B$uC9&0=kDwvYyOo6;^z%?+MR;5pkWZX@Up8MnP04W+@B2HK@ZKOI8c)J8GHL*!aC& z_Rl~wEO|4aO;HQg#LGA{WV1%(Z#7 z-3VbC1J0Skn~gVr1H)XXJFp}edR!x^+$zG}y}Im`>1_6=Ro#h#A%2xS{2ITOrkJq} z=U~1*)P(OGwu2T|ZQ6b~ib^HZpXeGcdDj`Mu<^JfwI`-ZSGwU@i@%kdx$Ihq6kDt` zc96%#;HDcr9t1<(T_u;j&j!83prabX{MI_tY_e{Howix#c`a$#yjKiFDmVxlt7^;_ zYDM8--UT(zHeDSg5cG}4_8m9P0<18QZsKX*#@GGkVJgEtnYDn{_tpUkp4GhgQ)#{H zxD2a}F~S{cvL#3m=4`;jsru!70^%RjpyOFf`;++*+}FQ+JB)1I5l)SpoG5}J;LA6M z5{7^SyQzVoExd%+Q9}^KIwA%Q!#;u6z0K};rtdD5#Y9d}wZ+0rU<32$2NGjGI#OBf zo@kZ4yDInGUzA_assh?#X9siba;5wWe1J z(x{1BP1oShfvA{bIXo$gJX&NN1u54i)045ZT4H7+!*gSIE0@${97dzrC}6n%s2qUD z?fkJvb(~K9k0k{!L?|!4^lIx}XK8Q7i_k$D&1UClmK`epuf7?+M5PwsjRD&}Jf}nm zSTsLi!(p{CSG*@1@S_%dJzHr~(oXU=tmABb{-<8YB;UbetN6<|G;8FHD*LmCjqcv)hjiF&Ydxswf#62cs4@jqJp(Mz_c~S() zO?Ix1ssQ-w|Ze{7xjLlYw6C1GPeJ^jx*4bQY*DkIVKt(p(qBUz>o;q$n)yuwsRx%lo^u+X-#GCfZ^BLbV)35m}A+4A? z&3d)`mD$qeLS4g2*HpGHR;i*@)x%pOEF1Q#^q?ctB+k{lj7_q$yp+w5(ncXXP%H8r6KUO`MC(E3GyJ2`2e`w7Dj?Z$Rj%FykW0B(pGl{A2O#NffhBaEsxC zW5a1$oxyni`pE5?Fyi3gwA#imqEzGS`VozCk!KKCvS*O1e<}#}m#JBuk2b({a1EdK zR!E%J3#Vf@Q(bVL7~g!kP{SpV_n7)VFybA;_%8&-}$Ll==N+R zXn8VDdYR8xQzCpSMQb>z(7T(GQWPO6V=O#pp-o^+8vcYFN)5Cf5cvPj6c$v6=V<3EypHCrGtgHWsqt zCibkRw@@WM>XnRU!^$pfA3L&ZHv6>%X*i4d6>v8$I+OyCfBeALwPmeWRR46bZTJn5 zSv-B@Zj5MhkS#9Y=6$#8a3I$(yoZB&q3L|fnZ)6gYwx}Rb;fZEQnBq5@*VO0hUXCU zIcuNoAkq#0PK;bugEZa!rb&J75t&jU%X-|TZK2LrDyjA(C(mgG|L{Xzwp7{v2mUk8 zsA6sJW=Nf@AwL)J^p#hVdtZ1uNy5r)<7iUtB&naZZ>HJB`3z5v-BqSx($gQ$iIgmo zYqzkt2G$p*H-f+l7Q5PVY5u!Ik$rF_lX3HW@7mSefHEeJ~a)wL-8s9wF1-q%$c)G8{cbua0k0 z!RzE7o`?U5;CE_XpcACZa@CgN3vQk35Pjj`H;iT3!LhJ0TLI{{QI|vE3*b$9?X?c# zRJXa8)5I1i=3N_&Cxv82n^ZGmbmFszi7Cj>WG?{3Da`a6>wH(8xBo z9jO0nWrg&nA*jMwU2F*YmrJZYld(mmSBSr}Ql71b$kb`hz@DDiX<^dQ)eHk&v zKZ$)nv=-oQqcvmy0*631fJ)eC3DXe%gBu)ytikwHv-IuXhcG-hj&}7-Qd)zR`Tr(D z-Um_u2H^tnj{QSR2#){>dOY4%JfFSr=QEcCi0N4Bkp>(z{};==Pm$c8UbR?HC(zJ{nNP3GekNn3WWJ5`>_9+!&TZH zp8ZQ*{*Q{<7#*b;vqP2EejFaiWVg0xL{Gf#>$K-1NByHtG_B{?E#hTrW^aygw$((5ZYKN%7gNEPu>{^7dkCAo7R!`)gcLDy<`nrGD%<{i9O{`rBXts4t z|394X7d?RHyj+;r|L%xq7zm)bvJXkvf9n`Hy8~cUBU)+0e}d1yVU{}pVYjGd4FBNb z-yQV$Obin!?EXUxwVsJ#a7@yFh+!L`i~G%i#ebOC{}t-|AMw6OM2!j^!oGx zL%7m@S57OlSIC40t35l-^6!WzEF z^A_FDMz$uU$AQeMj!Tf1*OCU_b*oMnY{S*E+h*ku9WzK=TZsJjwV8%tmi$Pqu*Gb7 z=8eW!A|uHH)32Zyens*~GHx?GF2~sY(xy@P8btc$Pu4NVfHPHXIxSIawSo`2fct#Y ziAbO0f_=6%LVZ01XyBJJfya5&s~UhWq86X+OBOp+Iv%Q;0In4G-9c5Q`;9Hh`16}V z!6HQhDH($#5!}Sf+ein@y4~{e+2d--`UV2*;zCe`S4On0#lN~l}$ zEkT#qV*rT92>hMdD7-3OOPtUE8sGC;XHq2BoIZ~nAokuLA(J$V1!6r;doU;Jth6Rm z5m4t2=Bjdm3@|XDh5_IQ59GjOF!)BLQeQkd{uLzh5@`qqEtAg(zWH>ag3tQ#O4~{) zc$0aC)4RR(dM(6YGEW+OHpVx!H(g`^nBw65vbMo+JTBPnkktF9i`kUy0?j-VO{ARi{RYU9t(u?#`^aTMG84mg70(#E~Q&IS>##Ll0+nTL9z zE6(d-BEk#2qs;9QzK-`04G5mmXgea#@&QJXjV_+5tE0+-&HKjU3)qtLpE1&Mfrl=k zmZ{cIx$T$KG1V4~J4)Fa3m5rqb9?YA6)Ugul@?luj+vdO-|rQbQ6aG7I54!gWG1n= zQVfAuaI?aw3WO>24>*nMcZa~|-nwNn0Ffw!!depF;*1^*6X}L3= z+2L7S^B~jvazFI5xmVE@Nqopd$imNT(Y^)$#zY(Mn@!lWw&7D8Hgjd?hYJvJaL6G> zZ#Z5Gr~p-vAO0Kz^1kv|T&Hq9B#Qo8QJNj_S3O6SP~ubW{uIWQVc6bXxJKOQ`BE z(Z)fbHyQ;$CVBJ=#C;^zMY*~$nKXn(j6C5?a|aLH*P9GNZbW*13__>5lR8Q9!7xDuKdziH{ut&@SJOOg7+N8bN zJijm6D99*@fR@>C7=FtXO{!?~is=f<>Taq)-q!@h_XZI*Z~#Nid{w!pw~tML4GYtn zxkUK`kX_f3-W>9oSfz?TQd@PQovR#J-}zc>HwPJj<@|}LlQr)Y1caD6J=h5u5?RxI3)MVps%KcLAe@V@QPXudS9^ZjdcmU3zoc%N)4XzkoY?Hu zCkuamRO0a%{gGkyM}rn%3B9^;NDbE^HIu5`VvkP)(<_pDzyD0#y`hXDhE4jFUbg$b z$el0sE5{?SWLHtbgHi$A`0FWf88;{H02^-XJqtP_$O;(VL}ctzJMUzV)1zyYA@sl6 zN;w+@#k|4^{_>ph-Ub_{$tC9`MTlE%@_2vazehwPb-T&eZyhV5KjOaZ?jvgE3rllo z<<*Zvno#lHjJc?ny*7+Ch6X49hsGwB>Ra-Kmdp7N4a{(1X$5W!R`=ZxDP?(j3N!Tr zgd^@a^dmLOsW_5pWWDbnyxT`;61yU@cO_wH2{uO*i1^;6(^pRvWYm1jPHc6HDpu*% zW3baz<<5AUZ+Rw5`!1cyAowYe$u66q>`rC|FMIZ}F;VqR75a`KiA6r%FqYe`>Wp0P zF@(v?;Et&l{5cAw(81<0Wrfya(gWw1BamT#0NB%GQbIo~y<#Y2N2PE-zZZ?&vK!;s zex%Q7Y)?RD)Kz?s88L^Mmq4@@d6!a4Qbi9w_-;3PYLCv=;SwCkiwWY^{z$@{mAL9_ z6w`1OVo!D%vb3OY2)h3ee$BROkQY{<;>N3j+F zSlhIFjWc&yO({amL?Z9c_0E1=U=CP7_8;wm9U!f4fLv2HMbOXee3}U|2K(o58Cf7C zQHdCp$nxaUy&Xj_lVa@%+k$>GdqMXl+B=^WBdTVt>zz1%{){g>Pf?=yNjzu6%*$RA zH6MvSbc@+8UeI3USf*W_rx`Y0wUgLJFv>VA4>upn8rzk4jca zx+t0nix%AE)E#AgH@`2|q^FCMH!M}}LA*`KaxdI)F{4%mm}_)L7_~X~49Mc1q1>jC zPUNicYE)>r6YeapC*yAlVqf4n&g#rn8VoT_KDyBHeedho45ltUF`g_WfdAk}@{#UL ziR>dx3J34qS_r+ndJsNz$OqUDMKMAQFE{+YhA`^&b_+?@Rir8ZSgHsGRg?^BVGTql ziA-kQ7(uEG%8_e*|7I*O(@sKWw&@1;^r41;xfd&Zoy9<8_D6xsW*LbyJ)y0yPujUY z`KwGbV=nwCdERabywixxa(4LFD~9NroU65_#_df5#i&9O5wiGk));FmkkW||4Vi|A z`Ka_d8ThDVXM+QFfaLQ`F*@VHy}%@X9>~|U_yg`g8~LXQKNxEeS{Y*n7W)#;#5G=( zscF%G(iwFG1Kjz)1kOtih!@vQV_4+D*i}<)Z43;_HY3fY}NAsb)UZ?dIOI`JrT!0Ohgxj7AuY&^hw+TQgO@q7*sj{ zWK$f~QOJQH-ELWOB|^mQEmZ<{J~}6)AY6D;e;y4LP1E%VT{J$(h=>r{70P>tty5rp46p=U9bKhF^()Gf?cwH;tBu@s zT2DF=Jqm;ShDArmPJ4p3U{;kF@+7Jm%o$7ad{m&Altjm|bYJ=x3k;9H)Ah{O5XEzd zEn91ENG!}!Nd4TUboE%WCpm_yFD2#|vK4NRo5T4dx^N0kVN(^4#D+-~apG&awEq`0 zyY11D;iP5s^Ip3|1@SrWudK;Hh?)^)^E3>^-#z}Qee_YSx4w@r?G!cl9#P2hSl}`9=JrFk$eZDpIrKbtB}!iZauVW z<469k-e72BY~^6e>Gv#!)TVmM0uW$dMU-@wTTV3Woy4!$LsF|}a2>hnkyPZ`?wf=Z zi4FTQJ0rI~uz`%i3&KmrH4Lb_6Xd6KG1(mT+cCTjI-^^n5CdL}l9`N|W05?FUyYr) z6tyzw)$LaOF&jll2_E!6#5|v|%ue`5bclwE=LmItGp989~5T{SFVcYmv zG81E5`c8y4R7yz_J6-Ud>FWs+8GzE-@*rx(DGv(3@Je-z8#*Z!*idNE{_=OkCy?>1KK*`jB;ofJZAS;%+y{F~kR={jc9Ks{;fr`EELm1Djo*`ene>dm zJX_Kmuwc|#;E%z|L|Fv}uastujSb>d@1%v7!a=djhsv6x@gr2<>5n7t!m8g@kC1oV zvzXw7M&6zlPQATEMxkeP?)3(p}dr3<| zjQd!W@UFM$VQJr(mP4Uz>_I{u>5)<-axi029@zQ6(v4_kl@3kF zxSH`dof?mp&{Fe848!u+)DGGfg4 z**hm7NY07I#BF#rj#sTBt={6CrswQqzGcEjG_$l5;juPVh4ja>c~n1cQJkm|#ujNB zf@}d7{KD@0Rv#1gvu%P!EH082|22n|0ZzR)3tCqX+TQ~=*pRdNm_HKb%fT@Z;x?di zqvrKoxL#`#d1Zi$bB*_^&~|vBU3H0^QfQQ#W)rH-AXRU^ROr9Xi%@)Izce_)fvHwa zoK(!8)k|F!i)bi2=x*U$__?D=gDD)~T>Vi5k_xXFS7YJGh9%T^mO(|g)SUd6p-Z*8 znOTfsRhGx-PYQ;<2r5(2A%EqXuXYaAv7)px!^B*NHN7BMrZ97v zvHfS4s>T0nzp7POzAFjzZP<%eQ|Mi^%s}kw6ghIUrRIb^3`oF!M{6 zJZ)>Y^bsB&f{39XEfU*45;-xM`p?Zk>-TlQwevRC<-O-U2e|O(@6c3m^`Y{%UP1tk z{(jCAE%e8kN452m<$tgFB0<1lAyq=`=Wa#ghUVHSGZL0Qj>|kLqehdtXO(lml>rxX z^3OcIr|BH6R5ue;rVE+I$cm&0H}Kti8RAq07JgHj8_r5eV-ZW|0aI&}=@zf4M+WIv zp)E!Fmn00(Z?qX0?vph~=A)%FX%MNZl=_JUF!VkGuG-K(Fs%E;2@ZN01e19(}GgcwIrIK7&S}rvKKqoM7 zCC%F*YhjnW!@4RUS!p1lTepBJo>jMi`F1Ki(%yN#+60+C^9X<}w=VHpYxwXUs@29( z^q7KVHn*OxN8x}jqYdaNLKw`@5zIYJM zem*#~BAk39)_;P)`ImQSPsf6^spOsV=gj-Q+pO~-BleeO1wLkljG^1g5I%O*l79`r zQ~`d&@zGz(Km3DGU)SjN1S5^~0T|$cI|eKLXE?*EK;?Xtl9mUnWI~-!bqjcqy3|D= zrVzqgih$>QYSrQ zfZ)3ae&{mqF?R=BOW<~s88GOKysg#jCXTz9J9STKjvl+wWv5!%4Tk}!SN7=<$aEV( z;W_^VVx#(O34wD6jBG+^MQTW%Y`~{n747otN)8V5tGRjK^>He^+>r>KCy(uDS}Jje zLOMB?R=u8q!ZddK+GRU+J3-NeEO}bR0cFh**os_<Gn2ji%h zq?C?ORRz=-IPXsT2vQWb$=d)AnZ4`#oYXiH6I)5OWGJ$yF^$s~OZlqX1rDbr2T;Sn z^|IUX2m%Ty0+)TXLSJ3Zt4`5Ds8L1ojU83Dp#y@MU{ zpoQ;#(xC}ACg|n<^3G06gXGTy^?0>|Q0aI!!rp{n3{-x|gx4=H?H#=dARl)h)}OaK&4 zv$MwBIi5;oRSp%v+c2Cc>=hTp{~$xsQ8G1dLmt< zAd?%wxL$-FY#FqjH_5-r1ZZEgd*SM%$JNqyH=C&}F2=zLLYfvs7t;5aeDKOA_?5UG zHxp+LyxFe;eq@&T{x~e}LUEvz$Nn>~c3wmJrWNZ9CbOC;I?fp+a3FLfr3c|hdzVSa z0*>$sUd2yGokpKow4Q*xeP9Lz_PkWnqwJZG)M_8sU$l;!M zVglI(q;=}kjN&~5l>Tu|8xh-<39cU)4W-kK_vdXV9F@Z)gn?D-uk?F zhxxGb`n4w6=?9NO>d0fP;ZIk|mhoK&C^L!nchuY|E1q{Ha9#|(CM>32`%^&C&+j$+ z1@!P2rXM}<^5ua|f&@~`UsLJ7yKE)h*i3$eo+RG9b7weIWH>WiRd)H-IA{<$>-_ND zOjN3B6ZD)DaHx(EYJBTpXldr0`U;<3PydGwwq4d06Q+j!=gi^r-N}b~lWLzNmNPq% z+1n`-93j3-09tNrUNZpER_)~P+9+F|i||SeBT@`8_L<*JhQJ4V$0UC6n{O0i83+>T<@Rv%Zl5%|+K05GVt^%g zRJ-kF&Zm#KA6%Td$NF!_8ck#g4CN;u;d)=}4~{uc3yX;O-lV}y;sgMBFRg`QC3+&| zUDHZhcn6{p5k%{Htx+DOtR>ZTBUu7>-`7=1+C0MdpZ82aW7QGu8K?~HQ@-M(

8K z{%lAcz;}}?zG2Zx6#4m5hpk)GmVQu2qlZ(!Spp|5S@A*y{kB#KS?^=Ie zrrHitmZpYWb*ek>73B9WbzVMmtpKC~_K(-nL=N+M=r*goiTIc_z&BFT%dvc1{osSZ zF?e^}cKj%594jZ!;0&B`uwLrV0f0^oNzve!yxBEEXWz)t(altV^2NnIikD|p^HTS# zV)7FRMCrJK-MVpDF2h(6%B6+F^RA)pcE;V$G`bJ- ze>Q1;Gw3}=@0p9@ir0zcj43+2zO&r}&^~b@GP<6)9JV?6P3@$?2LNcC3u9m2X&A$C z__{9^$4}?-sXJikwjpuy~TZ$#1dl>9-5x zP8IF_6oe+DQX6@NB0G@1se2?aXUZCbch7%;+(&bviQR$qC<3RYR={HM@!ywVCAtheJY- zav$fgQ*aVm{*zm8#{t<7r=Kh+aI`NssJ~c$DR;|c$^G-UpT2Nw!akOq4xQe5frW{O z+ja(9l}`TFJVo{dn{>{DS!PX+3KcWVuTR;~#PDQ}%chkHQhT4ztG02H&<)j^+jGbF z_0!J|ub$0Ro5A*|Q*5(nATUTw_GNf`?wP}N-uk@gdA;wGko=H+?Sj`v9|6mCA2utB zvPx}MpI@`-LHQAvGe-VPOb)1chE>+wQ3#sxSSmB# z#|h4~C2w_p3C!Ob;-TO4bDONCEAy=2!QKiTHVR8RR_<$X=rUKifzZm+2nOL?dkduFQ*x65KD zCrft5UM9vvXHIl6Fn8EG^t3?D|+~aeVT{%ncYMQg~85xlr zUzh%#uJL4UStS^iKYQZoJ^#0vWhq2mHL;0{2X-?LZP77w zn#yt7?NXje#+u_r7g?j(FDt7byy8)BkQ-%iNu>M7w6drJZQK9POrNJYXZ5X2bEzvH z)2{o*OM5QP2=Jad>G3Q(@t5hw*Q>Xd{yw-v>LLF!#j~%M{hubiepBVgosmm2E_l_H z1c2(B3z{q!O1`wrSf*(*`$n?**%B!ojfG)1D|n+ncXKa^pYX??Db0y**+b48Y0eoF zA01i5B`x~$d*O*$Gb5URiv5yj5$s%Pt>8Ro^MmfT?+puPl&yML`I)o$(V=bjPPfEw zF$lJJEL~}RpiM+|+12cbQkCpwGrv}yvEP_@Gfn;0OZK;m_OR-Ngk?Ia-4nYUR6Rdv z6Px3b{Yx4RdyAfawbFnziQXz+Fiu&!H*a-`zCyyqt@Cc5{qvxv`f ze0R7!ZJVZQ_LIHc&rjrP0Eak%m2YFC@(25n#Qi^1fxIaK3cn_LBrx%Fc)i>LY=<3Q zJo)3oH_J+P%iqn}4e8KvH0lUPUJeF#03cni#;p>Kj&pY<>mCdB6$YEh2x`YTxC;nG zzUJsz1@65;)!%h=NLg#WV^tVP1++FRVPTxOweP?R&;SIi6L&!t*kRk%ygEyC)W87> c!^Z!t^LfPon(lqJj{yigUHx3vIVCg!07-0nGynhq literal 0 HcmV?d00001 diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst new file mode 100644 index 00000000000..798ef96f3d4 --- /dev/null +++ b/recipes_source/distributed_device_mesh.rst @@ -0,0 +1,135 @@ +Getting Started with DeviceMesh +===================================================== + +**Author**: `Iris Zhang `__, `Wanchao Liang `__ + +.. note:: + |edit| View and edit this tutorial in `github `__. + +Prerequisites: + +- `Distributed Communication Package - torch.distributed `__ + +.. Setting up nccl communicators for distributed communication during distributed training could be challenging. For workloads where users need to compose different parallelisms, +.. users would need to manually set up and manage nccl communicators(for example, :class:`ProcessGroup`) for each parallelism solutions. This is fairly complicated and error-proned. +.. :class:`DeviceMesh` can help make this process much easier. + +What is DeviceMesh +------------------ +.. :class:`DeviceMesh` is a higher level abstraction that manages :class:`ProcessGroup`. It allows users to easily +.. create inter-node and intra-node process groups without worrying about how to set up ranks correctly for different sub process groups. +.. Users can also easily manage the underlying process_groups/devices for multi-dimensional parallelism via :class:`DeviceMesh`. + +.. figure:: /_static/img/distributed/device_mesh.png + :width: 100% + :align: center + :alt: PyTorch DeviceMesh + +Why DeviceMesh is Useful +------------------------ + +.. Below is the code snippet for a 2D setup without :class:`DeviceMesh`. First, we need to manually calculate shard group and replicate group. Then, we need to assign the correct shard and +.. replicate group to each rank. + +.. code-block:: python +import os + +import torch +import torch.distributed as dist + +# Understand world topology +rank = int(os.environ["RANK"]) +world_size = int(os.environ["WORLD_SIZE"]) +print(f"Running example on {rank=} in a world with {world_size=}") + +# Create process groups to manage 2-D like parallel pattern +dist.init_process_group("nccl") + +# Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7)) +# and assign the correct shard group to each rank +num_node_devices = torch.cuda.device_count() +shard_rank_lists = list(range(0, num_node_devices // 2)), list(range(num_node_devices // 2, num_node_devices)) +shard_groups = ( + dist.new_group(shard_rank_lists[0]), + dist.new_group(shard_rank_lists[1]), +) +current_shard_group = ( + shard_groups[0] if rank in shard_rank_lists[0] else shard_groups[1] +) + +# Create replicate groups (e.g. (0, 4), (1, 5), (2, 6), (3, 7)) +# and assign the correct replicate group to each rank +current_replicate_group = None +shard_factor = len(shard_rank_lists[0]) +for i in range(num_node_devices // 2): + replicate_group_ranks = list(range(i, num_node_devices, shard_factor)) + replicate_group = dist.new_group(replicate_group_ranks) + if rank in replicate_group_ranks: + current_replicate_group = replicate_group + +.. To run the above code snippet, we can leverage PyTorch Elastic. Let's create a file named ``2d_setup.py``. +.. Then, run the following `torch elastic/torchrun `__ command. + +.. code-block:: python +torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup.py + + +.. With the help of :func:`init_device_mesh`, we can accomplish the above 2D setup in just 2 lines. + + +.. code-block:: python +from torch.distributed.device_mesh import init_device_mesh +device_mesh = init_device_mesh("cuda", (2, 4)) + +.. Let's create a file named ``2d_setup_with_device_mesh.py``. +.. Then, run the following `torch elastic/torchrun `__ command. + +.. code-block:: python +torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup_with_device_mesh.py + + +How to use DeviceMesh with HSDP +------------------------------- + +Hybrid Sharding(HSDP) +Let's see an example of how DeviceMesh can assist with applying Hybrid Sharding strategy to your model. + +.. code-block:: python +import torch +import torch.nn as nn + +from torch.distributed.device_mesh import init_device_mesh +from torch.distributed.fsdp import FullyShardedDataParallel as FSDP, ShardingStrategy + + +class ToyModel(nn.Module): + def __init__(self): + super(ToyModel, self).__init__() + self.net1 = nn.Linear(10, 10) + self.relu = nn.ReLU() + self.net2 = nn.Linear(10, 5) + + def forward(self, x): + return self.net2(self.relu(self.net1(x))) + + +# HSDP: MeshShape(2, 4) +mesh_2d = init_device_mesh("cuda", (2, 4)) +model = FSDP( + ToyModel(), device_mesh=mesh_2d, sharding_strategy=ShardingStrategy.HYBRID_SHARD +) + +.. Let's create a file named ``hsdp.py``. +.. Then, run the following `torch elastic/torchrun `__ command. + +.. code-block:: python +torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 hsdp.py + +Conclusion +---------- +.. In conclusion, we have learned about :class:`DeviceMesh` and :func:`init_device_mesh`, as well as how +.. they can be used to describe the layout of devices across the cluster. + +.. For more information, please see the following: + +- `2D parallel combining Tensor/Sequance Parallel with FSDP `__ diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst index 5dc55874459..c6912efedfc 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_source/recipes_index.rst @@ -324,6 +324,13 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu :link: ../recipes/DCP_tutorial.html :tags: Distributed-Training + .. customcarditem:: + :header: Getting Started with DeviceMesh + :card_description: Learn how to use DeviceMesh + :image: ../_static/img/thumbnails/cropped/profiler.png + :link: ../recipes/distributed_device_mesh.html + :tags: Distributed-Training + .. TorchServe .. customcarditem:: From f036495e7d9dc9c51c67226fe60831ce8ef7d691 Mon Sep 17 00:00:00 2001 From: Iris Z <31293777+wz337@users.noreply.github.com> Date: Tue, 19 Dec 2023 13:30:36 -0800 Subject: [PATCH 02/10] Update recipes_source/distributed_device_mesh.rst Co-authored-by: Svetlana Karslioglu --- recipes_source/distributed_device_mesh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index 798ef96f3d4..700c7daeaed 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -10,7 +10,7 @@ Prerequisites: - `Distributed Communication Package - torch.distributed `__ -.. Setting up nccl communicators for distributed communication during distributed training could be challenging. For workloads where users need to compose different parallelisms, +.. Setting up the NVIDIA Collective Communication Library (NCCL) communicators for distributed communication during distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms, .. users would need to manually set up and manage nccl communicators(for example, :class:`ProcessGroup`) for each parallelism solutions. This is fairly complicated and error-proned. .. :class:`DeviceMesh` can help make this process much easier. From 572fdd93a1360241aeb47283d5159703b8ef3629 Mon Sep 17 00:00:00 2001 From: Iris Z <31293777+wz337@users.noreply.github.com> Date: Tue, 19 Dec 2023 13:32:02 -0800 Subject: [PATCH 03/10] Apply suggestions from code review Co-authored-by: Svetlana Karslioglu --- recipes_source/distributed_device_mesh.rst | 160 +++++++++++---------- 1 file changed, 83 insertions(+), 77 deletions(-) diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index 700c7daeaed..8512f7def46 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -9,16 +9,19 @@ Getting Started with DeviceMesh Prerequisites: - `Distributed Communication Package - torch.distributed `__ +- Python +- PyTorch 2.2 -.. Setting up the NVIDIA Collective Communication Library (NCCL) communicators for distributed communication during distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms, -.. users would need to manually set up and manage nccl communicators(for example, :class:`ProcessGroup`) for each parallelism solutions. This is fairly complicated and error-proned. -.. :class:`DeviceMesh` can help make this process much easier. + +Setting up the NVIDIA Collective Communication Library (NCCL) communicators for distributed communication during distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms, +users would need to manually set up and manage NCCL communicators (for example, :class:`ProcessGroup`) for each parallelism solutions. This process could be complicated and susceptible to errors. +:class:`DeviceMesh` can simplify this process, making it more manageable and less prone to errors. What is DeviceMesh ------------------ -.. :class:`DeviceMesh` is a higher level abstraction that manages :class:`ProcessGroup`. It allows users to easily -.. create inter-node and intra-node process groups without worrying about how to set up ranks correctly for different sub process groups. -.. Users can also easily manage the underlying process_groups/devices for multi-dimensional parallelism via :class:`DeviceMesh`. +:class:`DeviceMesh` is a higher level abstraction that manages :class:`ProcessGroup`. It allows users to effortlessly +create inter-node and intra-node process groups without worrying about how to set up ranks correctly for different sub process groups. +Users can also easily manage the underlying process_groups/devices for multi-dimensional parallelism via :class:`DeviceMesh`. .. figure:: /_static/img/distributed/device_mesh.png :width: 100% @@ -28,108 +31,111 @@ What is DeviceMesh Why DeviceMesh is Useful ------------------------ -.. Below is the code snippet for a 2D setup without :class:`DeviceMesh`. First, we need to manually calculate shard group and replicate group. Then, we need to assign the correct shard and -.. replicate group to each rank. +The following code snippet illustrates a 2D setup without :class:`DeviceMesh`. First, we need to manually calculate the shard group and replicate group. Then, we need to assign the correct shard and +replicate group to each rank. .. code-block:: python -import os - -import torch -import torch.distributed as dist - -# Understand world topology -rank = int(os.environ["RANK"]) -world_size = int(os.environ["WORLD_SIZE"]) -print(f"Running example on {rank=} in a world with {world_size=}") - -# Create process groups to manage 2-D like parallel pattern -dist.init_process_group("nccl") - -# Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7)) -# and assign the correct shard group to each rank -num_node_devices = torch.cuda.device_count() -shard_rank_lists = list(range(0, num_node_devices // 2)), list(range(num_node_devices // 2, num_node_devices)) -shard_groups = ( - dist.new_group(shard_rank_lists[0]), - dist.new_group(shard_rank_lists[1]), -) -current_shard_group = ( - shard_groups[0] if rank in shard_rank_lists[0] else shard_groups[1] -) - -# Create replicate groups (e.g. (0, 4), (1, 5), (2, 6), (3, 7)) -# and assign the correct replicate group to each rank -current_replicate_group = None -shard_factor = len(shard_rank_lists[0]) -for i in range(num_node_devices // 2): - replicate_group_ranks = list(range(i, num_node_devices, shard_factor)) - replicate_group = dist.new_group(replicate_group_ranks) - if rank in replicate_group_ranks: - current_replicate_group = replicate_group - -.. To run the above code snippet, we can leverage PyTorch Elastic. Let's create a file named ``2d_setup.py``. -.. Then, run the following `torch elastic/torchrun `__ command. + import os + + import torch + import torch.distributed as dist + + # Understand world topology + rank = int(os.environ["RANK"]) + world_size = int(os.environ["WORLD_SIZE"]) + print(f"Running example on {rank=} in a world with {world_size=}") + + # Create process groups to manage 2-D like parallel pattern + dist.init_process_group("nccl") + + # Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7)) + # and assign the correct shard group to each rank + num_node_devices = torch.cuda.device_count() + shard_rank_lists = list(range(0, num_node_devices // 2)), list(range(num_node_devices // 2, num_node_devices)) + shard_groups = ( + dist.new_group(shard_rank_lists[0]), + dist.new_group(shard_rank_lists[1]), + ) + current_shard_group = ( + shard_groups[0] if rank in shard_rank_lists[0] else shard_groups[1] + ) + + # Create replicate groups (for example, (0, 4), (1, 5), (2, 6), (3, 7)) + # and assign the correct replicate group to each rank + current_replicate_group = None + shard_factor = len(shard_rank_lists[0]) + for i in range(num_node_devices // 2): + replicate_group_ranks = list(range(i, num_node_devices, shard_factor)) + replicate_group = dist.new_group(replicate_group_ranks) + if rank in replicate_group_ranks: + current_replicate_group = replicate_group + +To run the above code snippet, we can leverage PyTorch Elastic. Let's create a file named ``2d_setup.py``. +Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python -torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup.py + torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup.py -.. With the help of :func:`init_device_mesh`, we can accomplish the above 2D setup in just 2 lines. +With the help of :func:`init_device_mesh`, we can accomplish the above 2D setup in just two lines. .. code-block:: python -from torch.distributed.device_mesh import init_device_mesh -device_mesh = init_device_mesh("cuda", (2, 4)) + from torch.distributed.device_mesh import init_device_mesh + device_mesh = init_device_mesh("cuda", (2, 4)) -.. Let's create a file named ``2d_setup_with_device_mesh.py``. -.. Then, run the following `torch elastic/torchrun `__ command. +Let's create a file named ``2d_setup_with_device_mesh.py``. +Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python -torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup_with_device_mesh.py + torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup_with_device_mesh.py How to use DeviceMesh with HSDP ------------------------------- -Hybrid Sharding(HSDP) -Let's see an example of how DeviceMesh can assist with applying Hybrid Sharding strategy to your model. +Hybrid Sharding Data Parallel(HSDP) is 2D strategy to perform FSDP within a host and DDP across hosts. + +Let's see an example of how DeviceMesh can assist with applying HSDP to your model. With DeviceMesh, +users would not need to manually create and manage shard group and replicate group. .. code-block:: python -import torch -import torch.nn as nn + import torch + import torch.nn as nn -from torch.distributed.device_mesh import init_device_mesh -from torch.distributed.fsdp import FullyShardedDataParallel as FSDP, ShardingStrategy + from torch.distributed.device_mesh import init_device_mesh + from torch.distributed.fsdp import FullyShardedDataParallel as FSDP, ShardingStrategy -class ToyModel(nn.Module): - def __init__(self): - super(ToyModel, self).__init__() - self.net1 = nn.Linear(10, 10) - self.relu = nn.ReLU() - self.net2 = nn.Linear(10, 5) + class ToyModel(nn.Module): + def __init__(self): + super(ToyModel, self).__init__() + self.net1 = nn.Linear(10, 10) + self.relu = nn.ReLU() + self.net2 = nn.Linear(10, 5) - def forward(self, x): - return self.net2(self.relu(self.net1(x))) + def forward(self, x): + return self.net2(self.relu(self.net1(x))) -# HSDP: MeshShape(2, 4) -mesh_2d = init_device_mesh("cuda", (2, 4)) -model = FSDP( - ToyModel(), device_mesh=mesh_2d, sharding_strategy=ShardingStrategy.HYBRID_SHARD -) + # HSDP: MeshShape(2, 4) + mesh_2d = init_device_mesh("cuda", (2, 4)) + model = FSDP( + ToyModel(), device_mesh=mesh_2d, sharding_strategy=ShardingStrategy.HYBRID_SHARD + ) -.. Let's create a file named ``hsdp.py``. -.. Then, run the following `torch elastic/torchrun `__ command. +Let's create a file named ``hsdp.py``. +Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python -torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 hsdp.py + torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 hsdp.py Conclusion ---------- -.. In conclusion, we have learned about :class:`DeviceMesh` and :func:`init_device_mesh`, as well as how -.. they can be used to describe the layout of devices across the cluster. +In conclusion, we have learned about :class:`DeviceMesh` and :func:`init_device_mesh`, as well as how +they can be used to describe the layout of devices across the cluster. -.. For more information, please see the following: +For more information, please see the following: - `2D parallel combining Tensor/Sequance Parallel with FSDP `__ +- `Composable PyTorch Distributed with PT2 `__ From 1102397384be90d8f40cabb3eae87ec202ce7aeb Mon Sep 17 00:00:00 2001 From: wz337 Date: Wed, 20 Dec 2023 14:25:47 -0800 Subject: [PATCH 04/10] 1. add to why DM is useful, 2. add get PG, 3. add note --- recipes_source/distributed_device_mesh.rst | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index 8512f7def46..b55ef74c81f 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -30,8 +30,12 @@ Users can also easily manage the underlying process_groups/devices for multi-dim Why DeviceMesh is Useful ------------------------ +DeviceMesh is useful, when composability is requried. That is when your parallelism solutions require both communication across hosts and within each host. +The image above shows that we can create a 2D mesh that connects the devices within each host, and connects each device with its counterpart on the other hosts in a homogenous setup. -The following code snippet illustrates a 2D setup without :class:`DeviceMesh`. First, we need to manually calculate the shard group and replicate group. Then, we need to assign the correct shard and +Without DeviceMesh, users would need to manually set up NCCL communicators before applying any parallelism. +The following code snippet illustrates a hybrid sharding 2-D Parallel pattern setup without :class:`DeviceMesh`. +First, we need to manually calculate the shard group and replicate group. Then, we need to assign the correct shard and replicate group to each rank. .. code-block:: python @@ -76,13 +80,21 @@ Then, run the following `torch elastic/torchrun `__ command. From 42592fbfd308bc0ddea1254e19a9ac3939435c15 Mon Sep 17 00:00:00 2001 From: wz337 Date: Wed, 20 Dec 2023 16:32:45 -0800 Subject: [PATCH 05/10] address wanchao's comments --- distributed/home.rst | 20 +++++++++++++++++++- recipes_source/distributed_device_mesh.rst | 18 +++++++++--------- recipes_source/recipes_index.rst | 14 +++++++------- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/distributed/home.rst b/distributed/home.rst index aac2a1df494..e80c4e95fbe 100644 --- a/distributed/home.rst +++ b/distributed/home.rst @@ -13,6 +13,7 @@ PyTorch with each method having their advantages in certain use cases: * `DistributedDataParallel (DDP) <#learn-ddp>`__ * `Fully Sharded Data Parallel (FSDP) <#learn-fsdp>`__ +* `Device Mesh <#device-mesh>`__ * `Remote Procedure Call (RPC) distributed training <#learn-rpc>`__ * `Custom Extensions <#custom-extensions>`__ @@ -51,7 +52,7 @@ Learn DDP :link: https://pytorch.org/tutorials/advanced/generic_join.html?utm_source=distr_landing&utm_medium=generic_join :link-type: url - This tutorial describes the Join context manager and + This tutorial describes the Join context manager and demonstrates it's use with DistributedData Parallel. +++ :octicon:`code;1em` Code @@ -83,6 +84,23 @@ Learn FSDP +++ :octicon:`code;1em` Code +.. _device-mesh: + +Learn DeviceMesh +---------------- + +.. grid:: 3 + + .. grid-item-card:: :octicon:`file-code;1em` + Getting Started with DeviceMesh + :link: https://pytorch.org/tutorials/recipes/distributed_device_mesh.html?highlight=devicemesh + :link-type: url + + In this tutorial you will learn to implement about `DeviceMesh` + and how it can help with distributed training. + +++ + :octicon:`code;1em` Code + .. _learn-rpc: Learn RPC diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index b55ef74c81f..8e4bcccef8e 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -30,10 +30,10 @@ Users can also easily manage the underlying process_groups/devices for multi-dim Why DeviceMesh is Useful ------------------------ -DeviceMesh is useful, when composability is requried. That is when your parallelism solutions require both communication across hosts and within each host. +DeviceMesh is useful when working with multi-dimensional parallelism (i.e. 3-D parallel) where parallelism composability is requried. For example, when your parallelism solutions require both communication across hosts and within each host. The image above shows that we can create a 2D mesh that connects the devices within each host, and connects each device with its counterpart on the other hosts in a homogenous setup. -Without DeviceMesh, users would need to manually set up NCCL communicators before applying any parallelism. +Without DeviceMesh, users would need to manually set up NCCL communicators, cuda devices on each process before applying any parallelism, which could be quite complicated. The following code snippet illustrates a hybrid sharding 2-D Parallel pattern setup without :class:`DeviceMesh`. First, we need to manually calculate the shard group and replicate group. Then, we need to assign the correct shard and replicate group to each rank. @@ -51,6 +51,7 @@ replicate group to each rank. # Create process groups to manage 2-D like parallel pattern dist.init_process_group("nccl") + torch.cuda.set_device(rank) # Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7)) # and assign the correct shard group to each rank @@ -78,11 +79,10 @@ To run the above code snippet, we can leverage PyTorch Elastic. Let's create a f Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python - torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup.py + torchrun --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup.py -Note - -For simplicity of demonstration, we are simulating 2D parallel using only one node. Note that this code snippet can also be used when running on multi hosts setup. +.. note:: + For simplicity of demonstration, we are simulating 2D parallel using only one node. Note that this code snippet can also be used when running on multi hosts setup. With the help of :func:`init_device_mesh`, we can accomplish the above 2D setup in just two lines, and we can still access the underlying :class:`ProcessGroup` if needed. @@ -100,7 +100,7 @@ Let's create a file named ``2d_setup_with_device_mesh.py``. Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python - torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup_with_device_mesh.py + torchrun --nproc_per_node=8 2d_setup_with_device_mesh.py How to use DeviceMesh with HSDP @@ -108,7 +108,7 @@ How to use DeviceMesh with HSDP Hybrid Sharding Data Parallel(HSDP) is 2D strategy to perform FSDP within a host and DDP across hosts. -Let's see an example of how DeviceMesh can assist with applying HSDP to your model. With DeviceMesh, +Let's see an example of how DeviceMesh can assist with applying HSDP to your model with a simple setup. With DeviceMesh, users would not need to manually create and manage shard group and replicate group. .. code-block:: python @@ -140,7 +140,7 @@ Let's create a file named ``hsdp.py``. Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python - torchrun --nnodes=1 --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 hsdp.py + torchrun --nproc_per_node=8 hsdp.py Conclusion ---------- diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst index c6912efedfc..9860fd5926d 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_source/recipes_index.rst @@ -296,6 +296,13 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu .. Distributed Training +.. customcarditem:: + :header: Getting Started with DeviceMesh + :card_description: Learn how to use DeviceMesh + :image: ../_static/img/thumbnails/cropped/profiler.png + :link: ../recipes/distributed_device_mesh.html + :tags: Distributed-Training + .. customcarditem:: :header: Shard Optimizer States with ZeroRedundancyOptimizer :card_description: How to use ZeroRedundancyOptimizer to reduce memory consumption. @@ -324,13 +331,6 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu :link: ../recipes/DCP_tutorial.html :tags: Distributed-Training - .. customcarditem:: - :header: Getting Started with DeviceMesh - :card_description: Learn how to use DeviceMesh - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/distributed_device_mesh.html - :tags: Distributed-Training - .. TorchServe .. customcarditem:: From 0cffa7ef6edec95b26e57ee53d3e5857c2769dde Mon Sep 17 00:00:00 2001 From: Iris Z <31293777+wz337@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:40:44 -0800 Subject: [PATCH 06/10] Update recipes_source/distributed_device_mesh.rst Co-authored-by: Wanchao --- recipes_source/distributed_device_mesh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index 8e4bcccef8e..37f79ab7075 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -13,7 +13,7 @@ Prerequisites: - PyTorch 2.2 -Setting up the NVIDIA Collective Communication Library (NCCL) communicators for distributed communication during distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms, +Setting up distributed communicators, i.e. NVIDIA Collective Communication Library (NCCL) communicators for distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms, users would need to manually set up and manage NCCL communicators (for example, :class:`ProcessGroup`) for each parallelism solutions. This process could be complicated and susceptible to errors. :class:`DeviceMesh` can simplify this process, making it more manageable and less prone to errors. From a3dff623c6315e9c3569fea834aa969edef97a1f Mon Sep 17 00:00:00 2001 From: wz337 Date: Wed, 20 Dec 2023 19:42:36 -0800 Subject: [PATCH 07/10] address comments 2 --- distributed/home.rst | 2 +- recipes_source/distributed_device_mesh.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/distributed/home.rst b/distributed/home.rst index e80c4e95fbe..aa3d56fb834 100644 --- a/distributed/home.rst +++ b/distributed/home.rst @@ -96,7 +96,7 @@ Learn DeviceMesh :link: https://pytorch.org/tutorials/recipes/distributed_device_mesh.html?highlight=devicemesh :link-type: url - In this tutorial you will learn to implement about `DeviceMesh` + In this tutorial you will learn to about `DeviceMesh` and how it can help with distributed training. +++ :octicon:`code;1em` Code diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index 37f79ab7075..bd71c994279 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -13,7 +13,7 @@ Prerequisites: - PyTorch 2.2 -Setting up distributed communicators, i.e. NVIDIA Collective Communication Library (NCCL) communicators for distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms, +Setting up distributed communicators, i.e. NVIDIA Collective Communication Library (NCCL) communicators, for distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms, users would need to manually set up and manage NCCL communicators (for example, :class:`ProcessGroup`) for each parallelism solutions. This process could be complicated and susceptible to errors. :class:`DeviceMesh` can simplify this process, making it more manageable and less prone to errors. From b20f5a3f095f5e41bc2a8b740767e047e6c7b88b Mon Sep 17 00:00:00 2001 From: Iris Z <31293777+wz337@users.noreply.github.com> Date: Thu, 21 Dec 2023 08:57:08 -0800 Subject: [PATCH 08/10] Update distributed_device_mesh.rst --- recipes_source/distributed_device_mesh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index bd71c994279..3488f07abd3 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -9,7 +9,7 @@ Getting Started with DeviceMesh Prerequisites: - `Distributed Communication Package - torch.distributed `__ -- Python +- Python 3.8 - 3.11 - PyTorch 2.2 From 08a9a4050ee6aaf8ec299a48bcac36834c10fb3b Mon Sep 17 00:00:00 2001 From: Iris Z <31293777+wz337@users.noreply.github.com> Date: Thu, 21 Dec 2023 08:57:56 -0800 Subject: [PATCH 09/10] Update distributed_device_mesh.rst --- recipes_source/distributed_device_mesh.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst index 3488f07abd3..ded1ecd4e99 100644 --- a/recipes_source/distributed_device_mesh.rst +++ b/recipes_source/distributed_device_mesh.rst @@ -39,6 +39,7 @@ First, we need to manually calculate the shard group and replicate group. Then, replicate group to each rank. .. code-block:: python + import os import torch @@ -79,6 +80,7 @@ To run the above code snippet, we can leverage PyTorch Elastic. Let's create a f Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python + torchrun --nproc_per_node=8 --rdzv_id=100 --rdzv_endpoint=localhost:29400 2d_setup.py .. note:: @@ -89,6 +91,7 @@ access the underlying :class:`ProcessGroup` if needed. .. code-block:: python + from torch.distributed.device_mesh import init_device_mesh mesh_2d = init_device_mesh("cuda", (2, 4), mesh_dim_names=("replicate", "shard")) @@ -100,6 +103,7 @@ Let's create a file named ``2d_setup_with_device_mesh.py``. Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python + torchrun --nproc_per_node=8 2d_setup_with_device_mesh.py @@ -112,6 +116,7 @@ Let's see an example of how DeviceMesh can assist with applying HSDP to your mod users would not need to manually create and manage shard group and replicate group. .. code-block:: python + import torch import torch.nn as nn @@ -140,6 +145,7 @@ Let's create a file named ``hsdp.py``. Then, run the following `torch elastic/torchrun `__ command. .. code-block:: python + torchrun --nproc_per_node=8 hsdp.py Conclusion From 3ba947eed17728be04b59a04009dd568fecb9762 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Thu, 21 Dec 2023 09:14:48 -0800 Subject: [PATCH 10/10] Update distributed/home.rst --- distributed/home.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/home.rst b/distributed/home.rst index aa3d56fb834..ff0dbf73e5a 100644 --- a/distributed/home.rst +++ b/distributed/home.rst @@ -96,7 +96,7 @@ Learn DeviceMesh :link: https://pytorch.org/tutorials/recipes/distributed_device_mesh.html?highlight=devicemesh :link-type: url - In this tutorial you will learn to about `DeviceMesh` + In this tutorial you will learn about `DeviceMesh` and how it can help with distributed training. +++ :octicon:`code;1em` Code