incorporate some of @jstac comments

mmcky · mmcky · commit cdf794dfa314 · 2024-03-12T15:48:57.000+11:00
diff --git a/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv b/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv
@@ -1,21 +1,21 @@
 year,n_wealth,t_income,l_income
-1950,0.8257332034366344,0.44248654139458693,0.5342948198773417
-1953,0.8059487586599325,0.42645440609359514,0.5158978980963705
-1956,0.8121790488050629,0.4442694287339931,0.5349293526208135
-1959,0.7952068741637919,0.4374934807706156,0.5213985948309419
-1962,0.8086945076579375,0.4435843103853644,0.5345127915054356
-1965,0.7904149225687939,0.43763715466663433,0.7487860020887759
-1968,0.7982885066993506,0.42086207944389026,0.5242396427381537
-1971,0.7911574835420256,0.42333442460902515,0.5576454812313486
-1977,0.7571418922185218,0.46187678800902543,0.5704448110072071
-1983,0.749433540064304,0.43934561846446973,0.5662220844385909
-1989,0.7715705301674298,0.51152495816542,0.6013995687471444
-1992,0.7508126614055317,0.4740650672076807,0.5983592657979544
-1995,0.7569492388110282,0.48965523558400603,0.596977951671693
-1998,0.7603291991801175,0.4911744158516888,0.5774462841723299
-2001,0.7816118750507037,0.5239092994681126,0.6042739644967319
-2004,0.7700355469522371,0.48843503839032426,0.5981432201792735
-2007,0.782141377648699,0.5197156312086187,0.6263452195753223
-2010,0.8250825295193419,0.5195972120145633,0.6453653328291933
-2013,0.8227698931835327,0.5314001749843346,0.6498682917772663
-2016,0.8342975903562247,0.5541400068900854,0.670684679337527
+1950,0.8257332034366353,0.4424865413945867,0.5342948198773424
+1953,0.8059487586599343,0.42645440609359475,0.5158978980963699
+1956,0.8121790488050622,0.4442694287339929,0.5349293526208142
+1959,0.7952068741637924,0.43749348077061573,0.5213985948309421
+1962,0.8086945076579368,0.4435843103853639,0.5345127915054342
+1965,0.790414922568795,0.43763715466663367,0.7487860020887751
+1968,0.7982885066993514,0.42086207944388976,0.5242396427381543
+1971,0.7911574835420238,0.4233344246090258,0.5576454812313485
+1977,0.7571418922185226,0.461876788009026,0.5704448110072052
+1983,0.7494335400643025,0.4393456184644705,0.5662220844385915
+1989,0.7715705301674318,0.511524958165423,0.6013995687471408
+1992,0.7508126614055309,0.4740650672076755,0.5983592657979545
+1995,0.7569492388110265,0.4896552355840044,0.5969779516716882
+1998,0.760329199180118,0.4911744158516898,0.5774462841723345
+2001,0.7816118750507034,0.5239092994681134,0.6042739644967283
+2004,0.7700355469522369,0.4884350383903255,0.5981432201792665
+2007,0.7821413776486987,0.5197156312086179,0.6263452195753251
+2010,0.825082529519343,0.5195972120145644,0.6453653328291921
+2013,0.8227698931835268,0.5314001749843339,0.6498682917772639
+2016,0.8342975903562239,0.5541400068900838,0.6706846793375301
diff --git a/lectures/inequality.md b/lectures/inequality.md
@@ -102,18 +102,18 @@ The curve $L$ is just a function $y = L(x)$ that we can plot and interpret.
 
 To create it we first generate data points $(x_i, y_i)$  according to
 
+```{prf:definition}
 $$
 x_i = \frac{i}{n},
 \qquad
 y_i = \frac{\sum_{j \leq i} w_j}{\sum_{j \leq n} w_j},
 \qquad i = 1, \ldots, n
 $$
+```
 
 Now the Lorenz curve $L$ is formed from these data points using interpolation.
 
-```{tip}
 If we use a line plot in `matplotlib`, the interpolation will be done for us.
-```
 
 The meaning of the statement $y = L(x)$ is that the lowest $(100
 \times x)$\% of people have $(100 \times y)$\% of all wealth.
@@ -337,11 +337,15 @@ smallest to largest.
 
 The Gini coefficient is defined for the sample above as 
 
+```{prf:definition}
+:label: define-gini
+
 $$
 G :=
 \frac{\sum_{i=1}^n \sum_{j = 1}^n |w_j - w_i|}
      {2n\sum_{i=1}^n w_i}.
-$$ (eq:gini)
+$$ ()
+```
 
 The Gini coefficient is closely related to the Lorenz curve.
 
@@ -529,6 +533,7 @@ Let us fetch the data for the USA and request for it to be returned as a `DataFr
 ```{code-cell} ipython3
 data = wb.data.DataFrame("SI.POV.GINI", "USA")
 data.head(n=5)
+data.columns = data.columns.map(lambda x: int(x.replace('YR',''))) # remove 'YR' in index and convert to int
 ```
 
 ```{tip}
@@ -559,8 +564,9 @@ plt.show()
 
 As can be seen in {numref}`gini_usa1` the Gini coefficient:
 
-1. moves slowly over time, and 
-2. does not have significant variation in the full range from 0 to 100.
+1. trended upward from 1980 to 2020 and then dropped slightly following the COVID pandemic
+1. moves slowly over time
+3. does not have significant variation in the full range from 0 to 100
 
 Using `pandas` we can take a quick look across all countries and all years in the World Bank dataset. 
 
@@ -569,6 +575,7 @@ By leaving off the `"USA"` this function returns all Gini data that is available
 ```{code-cell} ipython3
 # Fetch gini data for all countries
 gini_all = wb.data.DataFrame("SI.POV.GINI")
+gini_all.columns = gini_all.columns.map(lambda x: int(x.replace('YR',''))) # remove 'YR' in index and convert to int
 
 # Create a long series with a multi-index of the data to get global min and max values
 gini_all = gini_all.unstack(level='economy').dropna()
@@ -588,7 +595,6 @@ Let us zoom in a little on the US data and add some trendlines.
 {numref}`gini_usa1` suggests there is a change in trend around the year 1981
 
 ```{code-cell} ipython3
-data_usa.index = data_usa.index.map(lambda x: int(x.replace('YR',''))) # remove 'YR' in index and convert to int
 # Use pandas filters to find data before 1981
 pre_1981 = data_usa[data_usa.index <= 1981]
 # Use pandas filters to find data after 1981
@@ -808,7 +814,9 @@ Let's take another look at the USA, Norway, and the United Kingdom.
 
 ```{code-cell} ipython3
 countries = ['USA', 'NOR', 'GBR']
-gdppc = wb.data.DataFrame("NY.GDP.PCAP.KD", countries).T
+gdppc = wb.data.DataFrame("NY.GDP.PCAP.KD", countries)
+gdppc.columns = gdppc.columns.map(lambda x: int(x.replace('YR',''))) # remove 'YR' in index and convert to int
+gdppc = gdppc.T
 ```
 
 We can rearrange the data so that we can plot gdp per capita and the Gini coefficient across years
@@ -829,12 +837,6 @@ plot_data = plot_data.merge(pgdppc, left_index=True, right_index=True)
 plot_data.reset_index(inplace=True)
 ```
 
-We will transform the year column to remove the 'YR' text and return an integer.
-
-```{code-cell} ipython3
-plot_data.year = plot_data.year.map(lambda x: int(x.replace('YR','')))
-```
-
 Now using plotly to build a plot with gdp per capita on the y-axis and the Gini coefficient on the x-axis.
 
 ```{code-cell} ipython3
@@ -853,6 +855,7 @@ plot_data.year = plot_data.year.map(lambda x: x if x in labels else None)
 ```
 
 (fig:plotly-gini-gdppc-years)=
+
 ```{code-cell} ipython3
 fig = px.line(plot_data, 
               x = "gini",