From 414c8d825cd82a6fc39bcea5308a38f124f9e96d Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sat, 10 Mar 2018 13:40:01 -0600 Subject: [PATCH 01/10] Added summary to `DataFrame.combine`. Corrected the extended summary. Added descriptions to parameters. Added examples to demonstrate quirks in usage. --- pandas/core/frame.py | 79 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..2606e4785237c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4049,19 +4049,27 @@ def _compare(a, b): def combine(self, other, func, fill_value=None, overwrite=True): """ - Add two DataFrame objects and do not propagate NaN values, so if for a - (column, time) one frame is missing a value, it will default to the - other frame's value (which might be NaN as well) + Combine with `other` DataFrame using `func` to merge columns. + + Combines `self` DataFrame with `other` DataFrame using `func` + to merge columns. The row and column indexes of the resulting + DataFrame will be the union of the two. If `fill_value` is + specified, that value will be filled prior to the call to + `func`. If `overwrite` is `False`, columns in `self` that + do not exist in `other` will be preserved. Parameters ---------- other : DataFrame func : function Function that takes two series as inputs and return a Series or a - scalar - fill_value : scalar value + scalar, used to merge the two dataframes column by columns + fill_value : scalar value, default None + The value to fill NaNs with prior to passing any column to the + merge func overwrite : boolean, default True - If True then overwrite values for common keys in the calling frame + If True, columns in `self` that do not exist in `other` will be + overwritten with NaNs Returns ------- @@ -4069,13 +4077,69 @@ def combine(self, other, func, fill_value=None, overwrite=True): Examples -------- + Combine using a simple function that chooses the smaller column. + >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) - >>> df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller) A B 0 0 3 1 0 3 + Using `fill_value` fills Nones prior to passing the column to the + merge function. + + >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5 + 1 0 4 + + However, if the same element in both dataframes is None, that None + is preserved + + >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = DataFrame({'A': [1, 1], 'B': [None, 3]}) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 NaN + 1 0 3 + + Example that demonstrates the use of `overwrite` and behavior when + the axis differ between the dataframes. + + >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller) + A B C + 0 NaN NaN NaN + 1 NaN 3.0 1.0 + 2 NaN 3.0 1.0 + + >>> df1.combine(df2, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + + Demonstrating the preference of the passed in dataframe. + >>> df2.combine(df1, take_smaller) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 NaN + 2 NaN 3.0 NaN + + >>> df2.combine(df1, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + See Also -------- DataFrame.combine_first : Combine two DataFrame objects and default to @@ -4095,7 +4159,6 @@ def combine(self, other, func, fill_value=None, overwrite=True): # sorts if possible new_columns = this.columns.union(other.columns) do_fill = fill_value is not None - result = {} for col in new_columns: series = this[col] From de101386a53e4817b994f145efb880eec3f845f7 Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sat, 10 Mar 2018 13:40:01 -0600 Subject: [PATCH 02/10] Added summary to `DataFrame.combine`. Corrected the extended summary. Added descriptions to parameters. Added examples to demonstrate quirks in usage. --- pandas/core/frame.py | 79 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..2606e4785237c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4049,19 +4049,27 @@ def _compare(a, b): def combine(self, other, func, fill_value=None, overwrite=True): """ - Add two DataFrame objects and do not propagate NaN values, so if for a - (column, time) one frame is missing a value, it will default to the - other frame's value (which might be NaN as well) + Combine with `other` DataFrame using `func` to merge columns. + + Combines `self` DataFrame with `other` DataFrame using `func` + to merge columns. The row and column indexes of the resulting + DataFrame will be the union of the two. If `fill_value` is + specified, that value will be filled prior to the call to + `func`. If `overwrite` is `False`, columns in `self` that + do not exist in `other` will be preserved. Parameters ---------- other : DataFrame func : function Function that takes two series as inputs and return a Series or a - scalar - fill_value : scalar value + scalar, used to merge the two dataframes column by columns + fill_value : scalar value, default None + The value to fill NaNs with prior to passing any column to the + merge func overwrite : boolean, default True - If True then overwrite values for common keys in the calling frame + If True, columns in `self` that do not exist in `other` will be + overwritten with NaNs Returns ------- @@ -4069,13 +4077,69 @@ def combine(self, other, func, fill_value=None, overwrite=True): Examples -------- + Combine using a simple function that chooses the smaller column. + >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) - >>> df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller) A B 0 0 3 1 0 3 + Using `fill_value` fills Nones prior to passing the column to the + merge function. + + >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5 + 1 0 4 + + However, if the same element in both dataframes is None, that None + is preserved + + >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = DataFrame({'A': [1, 1], 'B': [None, 3]}) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 NaN + 1 0 3 + + Example that demonstrates the use of `overwrite` and behavior when + the axis differ between the dataframes. + + >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller) + A B C + 0 NaN NaN NaN + 1 NaN 3.0 1.0 + 2 NaN 3.0 1.0 + + >>> df1.combine(df2, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + + Demonstrating the preference of the passed in dataframe. + >>> df2.combine(df1, take_smaller) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 NaN + 2 NaN 3.0 NaN + + >>> df2.combine(df1, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + See Also -------- DataFrame.combine_first : Combine two DataFrame objects and default to @@ -4095,7 +4159,6 @@ def combine(self, other, func, fill_value=None, overwrite=True): # sorts if possible new_columns = this.columns.union(other.columns) do_fill = fill_value is not None - result = {} for col in new_columns: series = this[col] From c2f4b563c8513ae4deea38d0e0f59a2423dd3c82 Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sat, 10 Mar 2018 14:00:32 -0600 Subject: [PATCH 03/10] Added short summary to and added examples to demonstrate behavior. --- pandas/core/frame.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2606e4785237c..bb6c4da8193cd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4049,7 +4049,7 @@ def _compare(a, b): def combine(self, other, func, fill_value=None, overwrite=True): """ - Combine with `other` DataFrame using `func` to merge columns. + Perform series-wise combine with `other` DataFrame using given `func` Combines `self` DataFrame with `other` DataFrame using `func` to merge columns. The row and column indexes of the resulting @@ -4223,13 +4223,16 @@ def combine(self, other, func, fill_value=None, overwrite=True): def combine_first(self, other): """ - Combine two DataFrame objects and default to non-null values in frame - calling the method. Result index columns will be the union of the - respective indexes and columns + Update null values with elements from another DataFrame. + + Combine two DataFrame objects by filling null values in self DataFrame + with non-null values from other DataFrame. The row and column indexes of the resulting + DataFrame will be the union of the two. Parameters ---------- other : DataFrame + Provided DataFrame to use to fill null values Returns ------- @@ -4237,13 +4240,25 @@ def combine_first(self, other): Examples -------- + df1's values prioritized, use values from df2 to fill holes: - >>> df1 = pd.DataFrame([[1, np.nan]]) - >>> df2 = pd.DataFrame([[3, 4]]) + >>> df1 = DataFrame({'A': [None, 0], 'B': [None, 4]}) + >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine_first(df2) + A B + 0 1 3 + 1 0 4 + + Illustrate the behavior when the axis differ between the dataframes. + + >>> df1 = DataFrame({'A': [None, 0], 'B': [4, None]}) + >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) >>> df1.combine_first(df2) - 0 1 - 0 1 4.0 + A B C + 0 NaN 4.0 NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 See Also -------- From 596535eac8cea65b94f9b7fbcdd9ea376df9e74b Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sat, 10 Mar 2018 14:06:21 -0600 Subject: [PATCH 04/10] pep8 formatting for the docstrings --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb6c4da8193cd..1adbc2a0dc43b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4223,11 +4223,11 @@ def combine(self, other, func, fill_value=None, overwrite=True): def combine_first(self, other): """ - Update null values with elements from another DataFrame. + Update NaN values with value in the same location in `other` DataFrame Combine two DataFrame objects by filling null values in self DataFrame - with non-null values from other DataFrame. The row and column indexes of the resulting - DataFrame will be the union of the two. + with non-null values from other DataFrame. The row and column indexes + of the resulting DataFrame will be the union of the two. Parameters ---------- From ba7af38a966b47af9cff54c51ad690564c373f0b Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sat, 10 Mar 2018 14:54:42 -0600 Subject: [PATCH 05/10] updated doctests so that they all pass for Dataframe.combine and Dataframe.combine_first --- pandas/core/frame.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1adbc2a0dc43b..68cf871d59f0e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4049,7 +4049,7 @@ def _compare(a, b): def combine(self, other, func, fill_value=None, overwrite=True): """ - Perform series-wise combine with `other` DataFrame using given `func` + Perform series-wise combine with `other` DataFrame using given `func`. Combines `self` DataFrame with `other` DataFrame using `func` to merge columns. The row and column indexes of the resulting @@ -4061,15 +4061,16 @@ def combine(self, other, func, fill_value=None, overwrite=True): Parameters ---------- other : DataFrame + The DataFrame to merge column-wise. func : function Function that takes two series as inputs and return a Series or a - scalar, used to merge the two dataframes column by columns + scalar, used to merge the two dataframes column by columns. fill_value : scalar value, default None The value to fill NaNs with prior to passing any column to the - merge func + merge func. overwrite : boolean, default True If True, columns in `self` that do not exist in `other` will be - overwritten with NaNs + overwritten with NaNs. Returns ------- @@ -4078,7 +4079,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Examples -------- Combine using a simple function that chooses the smaller column. - + >>> from pandas import DataFrame >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 @@ -4092,42 +4093,40 @@ def combine(self, other, func, fill_value=None, overwrite=True): >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) - >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 >>> df1.combine(df2, take_smaller, fill_value=-5) A B - 0 0 -5 - 1 0 4 + 0 0 -5.0 + 1 0 4.0 However, if the same element in both dataframes is None, that None is preserved >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [None, 3]}) - >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 >>> df1.combine(df2, take_smaller, fill_value=-5) A B 0 0 NaN - 1 0 3 + 1 0 3.0 Example that demonstrates the use of `overwrite` and behavior when the axis differ between the dataframes. >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) - >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) - >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df2 = DataFrame({'B': [3, 3], 'C': [-10, 1],}, index=[1, 2]) >>> df1.combine(df2, take_smaller) A B C 0 NaN NaN NaN - 1 NaN 3.0 1.0 + 1 NaN 3.0 -10.0 2 NaN 3.0 1.0 >>> df1.combine(df2, take_smaller, overwrite=False) A B C 0 0.0 NaN NaN - 1 0.0 3.0 1.0 + 1 0.0 3.0 -10.0 2 NaN 3.0 1.0 Demonstrating the preference of the passed in dataframe. + >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) >>> df2.combine(df1, take_smaller) A B C 0 0.0 NaN NaN @@ -4223,7 +4222,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): def combine_first(self, other): """ - Update NaN values with value in the same location in `other` DataFrame + Update null elements with value in the same location in `other`. Combine two DataFrame objects by filling null values in self DataFrame with non-null values from other DataFrame. The row and column indexes @@ -4232,7 +4231,7 @@ def combine_first(self, other): Parameters ---------- other : DataFrame - Provided DataFrame to use to fill null values + Provided DataFrame to use to fill null values. Returns ------- @@ -4242,13 +4241,13 @@ def combine_first(self, other): -------- df1's values prioritized, use values from df2 to fill holes: - + >>> from pandas import DataFrame >>> df1 = DataFrame({'A': [None, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine_first(df2) A B - 0 1 3 - 1 0 4 + 0 1.0 3.0 + 1 0.0 4.0 Illustrate the behavior when the axis differ between the dataframes. From f3b8051680c9bf5d74085cea5c727b4a26bd7b13 Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sat, 10 Mar 2018 14:58:26 -0600 Subject: [PATCH 06/10] updated docstrings on DataFrame.combine and DataFrame.combine_first for proper HTML formatting. --- pandas/core/frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68cf871d59f0e..b6a8ca9521712 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4079,6 +4079,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Examples -------- Combine using a simple function that chooses the smaller column. + >>> from pandas import DataFrame >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) @@ -4126,6 +4127,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): 2 NaN 3.0 1.0 Demonstrating the preference of the passed in dataframe. + >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) >>> df2.combine(df1, take_smaller) A B C @@ -4241,6 +4243,7 @@ def combine_first(self, other): -------- df1's values prioritized, use values from df2 to fill holes: + >>> from pandas import DataFrame >>> df1 = DataFrame({'A': [None, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) From 29833a3a659e16f158ce939e6107c870a2e10947 Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sun, 11 Mar 2018 08:52:13 -0500 Subject: [PATCH 07/10] updated output alignment and removed term merge from combine docstring- addressing review comments --- pandas/bin/activate | 1 + pandas/bin/conda | 1 + pandas/bin/deactivate | 1 + pandas/core/frame.py | 46 +++++++++++++++++++++++++------------------ 4 files changed, 30 insertions(+), 19 deletions(-) create mode 120000 pandas/bin/activate create mode 120000 pandas/bin/conda create mode 120000 pandas/bin/deactivate diff --git a/pandas/bin/activate b/pandas/bin/activate new file mode 120000 index 0000000000000..e1e788b71d24a --- /dev/null +++ b/pandas/bin/activate @@ -0,0 +1 @@ +/home/henny-mac/miniconda3/bin/activate \ No newline at end of file diff --git a/pandas/bin/conda b/pandas/bin/conda new file mode 120000 index 0000000000000..73e416d3447af --- /dev/null +++ b/pandas/bin/conda @@ -0,0 +1 @@ +/home/henny-mac/miniconda3/bin/conda \ No newline at end of file diff --git a/pandas/bin/deactivate b/pandas/bin/deactivate new file mode 120000 index 0000000000000..4f9b1b25d9256 --- /dev/null +++ b/pandas/bin/deactivate @@ -0,0 +1 @@ +/home/henny-mac/miniconda3/bin/deactivate \ No newline at end of file diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6a8ca9521712..2e429621897fa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4052,11 +4052,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): Perform series-wise combine with `other` DataFrame using given `func`. Combines `self` DataFrame with `other` DataFrame using `func` - to merge columns. The row and column indexes of the resulting - DataFrame will be the union of the two. If `fill_value` is - specified, that value will be filled prior to the call to - `func`. If `overwrite` is `False`, columns in `self` that - do not exist in `other` will be preserved. + to element-wise combine columns. The row and column indexes of the + resulting DataFrame will be the union of the two. Parameters ---------- @@ -4081,6 +4078,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Combine using a simple function that chooses the smaller column. >>> from pandas import DataFrame + >>> import numpy as np >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 @@ -4089,14 +4087,24 @@ def combine(self, other, func, fill_value=None, overwrite=True): 0 0 3 1 0 3 + Example using a true element-wise combine function. + + >>> import numpy as np + >>> df1 = DataFrame({'A': [5, 0], 'B': [2, 4]}) + >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, np.minimum) + A B + 0 1 2 + 1 0 3 + Using `fill_value` fills Nones prior to passing the column to the merge function. >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) - A B - 0 0 -5.0 + A B + 0 0 -5.0 1 0 4.0 However, if the same element in both dataframes is None, that None @@ -4105,7 +4113,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [None, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) - A B + A B 0 0 NaN 1 0 3.0 @@ -4115,16 +4123,16 @@ def combine(self, other, func, fill_value=None, overwrite=True): >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) >>> df2 = DataFrame({'B': [3, 3], 'C': [-10, 1],}, index=[1, 2]) >>> df1.combine(df2, take_smaller) - A B C - 0 NaN NaN NaN - 1 NaN 3.0 -10.0 - 2 NaN 3.0 1.0 + A B C + 0 NaN NaN NaN + 1 NaN 3.0 -10.0 + 2 NaN 3.0 1.0 >>> df1.combine(df2, take_smaller, overwrite=False) - A B C - 0 0.0 NaN NaN - 1 0.0 3.0 -10.0 - 2 NaN 3.0 1.0 + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 -10.0 + 2 NaN 3.0 1.0 Demonstrating the preference of the passed in dataframe. @@ -4136,7 +4144,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): 2 NaN 3.0 NaN >>> df2.combine(df1, take_smaller, overwrite=False) - A B C + A B C 0 0.0 NaN NaN 1 0.0 3.0 1.0 2 NaN 3.0 1.0 @@ -4248,7 +4256,7 @@ def combine_first(self, other): >>> df1 = DataFrame({'A': [None, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine_first(df2) - A B + A B 0 1.0 3.0 1 0.0 4.0 @@ -4257,7 +4265,7 @@ def combine_first(self, other): >>> df1 = DataFrame({'A': [None, 0], 'B': [4, None]}) >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) >>> df1.combine_first(df2) - A B C + A B C 0 NaN 4.0 NaN 1 0.0 3.0 1.0 2 NaN 3.0 1.0 From 5cc5856945ed867f2142139bb8ead58f3888d675 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Jul 2018 14:28:49 -0500 Subject: [PATCH 08/10] remove unneeded files and some edits --- pandas/bin/activate | 1 - pandas/bin/conda | 1 - pandas/bin/deactivate | 1 - pandas/core/frame.py | 13 +++++-------- 4 files changed, 5 insertions(+), 11 deletions(-) delete mode 120000 pandas/bin/activate delete mode 120000 pandas/bin/conda delete mode 120000 pandas/bin/deactivate diff --git a/pandas/bin/activate b/pandas/bin/activate deleted file mode 120000 index e1e788b71d24a..0000000000000 --- a/pandas/bin/activate +++ /dev/null @@ -1 +0,0 @@ -/home/henny-mac/miniconda3/bin/activate \ No newline at end of file diff --git a/pandas/bin/conda b/pandas/bin/conda deleted file mode 120000 index 73e416d3447af..0000000000000 --- a/pandas/bin/conda +++ /dev/null @@ -1 +0,0 @@ -/home/henny-mac/miniconda3/bin/conda \ No newline at end of file diff --git a/pandas/bin/deactivate b/pandas/bin/deactivate deleted file mode 120000 index 4f9b1b25d9256..0000000000000 --- a/pandas/bin/deactivate +++ /dev/null @@ -1 +0,0 @@ -/home/henny-mac/miniconda3/bin/deactivate \ No newline at end of file diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aaab569126449..ad2c2d96cc676 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4894,7 +4894,7 @@ def _compare(a, b): def combine(self, other, func, fill_value=None, overwrite=True): """ - Perform series-wise combine with `other` DataFrame using given `func`. + Perform column-wise combine with another DataFrame based on a passed function. Combines `self` DataFrame with `other` DataFrame using `func` to element-wise combine columns. The row and column indexes of the @@ -4906,7 +4906,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): The DataFrame to merge column-wise. func : function Function that takes two series as inputs and return a Series or a - scalar, used to merge the two dataframes column by columns. + scalar. Used to merge the two dataframes column by columns. fill_value : scalar value, default None The value to fill NaNs with prior to passing any column to the merge func. @@ -4922,19 +4922,16 @@ def combine(self, other, func, fill_value=None, overwrite=True): -------- Combine using a simple function that chooses the smaller column. - >>> from pandas import DataFrame - >>> import numpy as np - >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) - >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 >>> df1.combine(df2, take_smaller) A B 0 0 3 1 0 3 - Example using a true element-wise combine function. + Example using a true element-wise combine function. - >>> import numpy as np >>> df1 = DataFrame({'A': [5, 0], 'B': [2, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine(df2, np.minimum) From 1c7aff9cffcad9430029e4c9f1590d23ff893819 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Jul 2018 14:31:37 -0500 Subject: [PATCH 09/10] forgot some pd --- pandas/core/frame.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ad2c2d96cc676..079070503f229 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4932,8 +4932,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): Example using a true element-wise combine function. - >>> df1 = DataFrame({'A': [5, 0], 'B': [2, 4]}) - >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine(df2, np.minimum) A B 0 1 2 @@ -4942,7 +4942,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Using `fill_value` fills Nones prior to passing the column to the merge function. - >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) A B @@ -4952,8 +4952,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): However, if the same element in both dataframes is None, that None is preserved - >>> df1 = DataFrame({'A': [0, 0], 'B': [None, 4]}) - >>> df2 = DataFrame({'A': [1, 1], 'B': [None, 3]}) + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) A B 0 0 NaN @@ -4962,8 +4962,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): Example that demonstrates the use of `overwrite` and behavior when the axis differ between the dataframes. - >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) - >>> df2 = DataFrame({'B': [3, 3], 'C': [-10, 1],}, index=[1, 2]) + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1],}, index=[1, 2]) >>> df1.combine(df2, take_smaller) A B C 0 NaN NaN NaN @@ -4978,7 +4978,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Demonstrating the preference of the passed in dataframe. - >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) >>> df2.combine(df1, take_smaller) A B C 0 0.0 NaN NaN From fbc3207c0313b8efe91f60bd4b43393ccb8aafef Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Jul 2018 14:41:11 -0500 Subject: [PATCH 10/10] flake8 and edit combine_first --- pandas/core/frame.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 079070503f229..897fa8dad61e6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4894,9 +4894,10 @@ def _compare(a, b): def combine(self, other, func, fill_value=None, overwrite=True): """ - Perform column-wise combine with another DataFrame based on a passed function. + Perform column-wise combine with another DataFrame based on a + passed function. - Combines `self` DataFrame with `other` DataFrame using `func` + Combines a DataFrame with `other` DataFrame using `func` to element-wise combine columns. The row and column indexes of the resulting DataFrame will be the union of the two. @@ -4943,7 +4944,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): merge function. >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) - >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) A B 0 0 -5.0 @@ -5063,7 +5064,7 @@ def combine_first(self, other): """ Update null elements with value in the same location in `other`. - Combine two DataFrame objects by filling null values in self DataFrame + Combine two DataFrame objects by filling null values in one DataFrame with non-null values from other DataFrame. The row and column indexes of the resulting DataFrame will be the union of the two. @@ -5079,20 +5080,18 @@ def combine_first(self, other): Examples -------- - df1's values prioritized, use values from df2 to fill holes: - - >>> from pandas import DataFrame - >>> df1 = DataFrame({'A': [None, 0], 'B': [None, 4]}) - >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) >>> df1.combine_first(df2) A B 0 1.0 3.0 1 0.0 4.0 - Illustrate the behavior when the axis differ between the dataframes. + Null values still persist if the location of that null value + does not exist in `other` - >>> df1 = DataFrame({'A': [None, 0], 'B': [4, None]}) - >>> df2 = DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2]) >>> df1.combine_first(df2) A B C 0 NaN 4.0 NaN