@@ -4894,33 +4894,104 @@ def _compare(a, b):
4894
4894
4895
4895
def combine (self , other , func , fill_value = None , overwrite = True ):
4896
4896
"""
4897
- Add two DataFrame objects and do not propagate NaN values, so if for a
4898
- (column, time) one frame is missing a value, it will default to the
4899
- other frame's value (which might be NaN as well)
4897
+ Perform column-wise combine with another DataFrame based on a
4898
+ passed function.
4899
+
4900
+ Combines a DataFrame with `other` DataFrame using `func`
4901
+ to element-wise combine columns. The row and column indexes of the
4902
+ resulting DataFrame will be the union of the two.
4900
4903
4901
4904
Parameters
4902
4905
----------
4903
4906
other : DataFrame
4907
+ The DataFrame to merge column-wise.
4904
4908
func : function
4905
4909
Function that takes two series as inputs and return a Series or a
4906
- scalar
4907
- fill_value : scalar value
4910
+ scalar. Used to merge the two dataframes column by columns.
4911
+ fill_value : scalar value, default None
4912
+ The value to fill NaNs with prior to passing any column to the
4913
+ merge func.
4908
4914
overwrite : boolean, default True
4909
- If True then overwrite values for common keys in the calling frame
4915
+ If True, columns in `self` that do not exist in `other` will be
4916
+ overwritten with NaNs.
4910
4917
4911
4918
Returns
4912
4919
-------
4913
4920
result : DataFrame
4914
4921
4915
4922
Examples
4916
4923
--------
4917
- >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]})
4918
- >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]})
4919
- >>> df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2)
4924
+ Combine using a simple function that chooses the smaller column.
4925
+
4926
+ >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]})
4927
+ >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})
4928
+ >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
4929
+ >>> df1.combine(df2, take_smaller)
4920
4930
A B
4921
4931
0 0 3
4922
4932
1 0 3
4923
4933
4934
+ Example using a true element-wise combine function.
4935
+
4936
+ >>> df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 4]})
4937
+ >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})
4938
+ >>> df1.combine(df2, np.minimum)
4939
+ A B
4940
+ 0 1 2
4941
+ 1 0 3
4942
+
4943
+ Using `fill_value` fills Nones prior to passing the column to the
4944
+ merge function.
4945
+
4946
+ >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]})
4947
+ >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})
4948
+ >>> df1.combine(df2, take_smaller, fill_value=-5)
4949
+ A B
4950
+ 0 0 -5.0
4951
+ 1 0 4.0
4952
+
4953
+ However, if the same element in both dataframes is None, that None
4954
+ is preserved
4955
+
4956
+ >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]})
4957
+ >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]})
4958
+ >>> df1.combine(df2, take_smaller, fill_value=-5)
4959
+ A B
4960
+ 0 0 NaN
4961
+ 1 0 3.0
4962
+
4963
+ Example that demonstrates the use of `overwrite` and behavior when
4964
+ the axis differ between the dataframes.
4965
+
4966
+ >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]})
4967
+ >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1],}, index=[1, 2])
4968
+ >>> df1.combine(df2, take_smaller)
4969
+ A B C
4970
+ 0 NaN NaN NaN
4971
+ 1 NaN 3.0 -10.0
4972
+ 2 NaN 3.0 1.0
4973
+
4974
+ >>> df1.combine(df2, take_smaller, overwrite=False)
4975
+ A B C
4976
+ 0 0.0 NaN NaN
4977
+ 1 0.0 3.0 -10.0
4978
+ 2 NaN 3.0 1.0
4979
+
4980
+ Demonstrating the preference of the passed in dataframe.
4981
+
4982
+ >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2])
4983
+ >>> df2.combine(df1, take_smaller)
4984
+ A B C
4985
+ 0 0.0 NaN NaN
4986
+ 1 0.0 3.0 NaN
4987
+ 2 NaN 3.0 NaN
4988
+
4989
+ >>> df2.combine(df1, take_smaller, overwrite=False)
4990
+ A B C
4991
+ 0 0.0 NaN NaN
4992
+ 1 0.0 3.0 1.0
4993
+ 2 NaN 3.0 1.0
4994
+
4924
4995
See Also
4925
4996
--------
4926
4997
DataFrame.combine_first : Combine two DataFrame objects and default to
@@ -4940,7 +5011,6 @@ def combine(self, other, func, fill_value=None, overwrite=True):
4940
5011
# sorts if possible
4941
5012
new_columns = this .columns .union (other .columns )
4942
5013
do_fill = fill_value is not None
4943
-
4944
5014
result = {}
4945
5015
for col in new_columns :
4946
5016
series = this [col ]
@@ -4992,27 +5062,41 @@ def combine(self, other, func, fill_value=None, overwrite=True):
4992
5062
4993
5063
def combine_first (self , other ):
4994
5064
"""
4995
- Combine two DataFrame objects and default to non-null values in frame
4996
- calling the method. Result index columns will be the union of the
4997
- respective indexes and columns
5065
+ Update null elements with value in the same location in `other`.
5066
+
5067
+ Combine two DataFrame objects by filling null values in one DataFrame
5068
+ with non-null values from other DataFrame. The row and column indexes
5069
+ of the resulting DataFrame will be the union of the two.
4998
5070
4999
5071
Parameters
5000
5072
----------
5001
5073
other : DataFrame
5074
+ Provided DataFrame to use to fill null values.
5002
5075
5003
5076
Returns
5004
5077
-------
5005
5078
combined : DataFrame
5006
5079
5007
5080
Examples
5008
5081
--------
5009
- df1's values prioritized, use values from df2 to fill holes:
5010
5082
5011
- >>> df1 = pd.DataFrame([[1, np.nan]])
5012
- >>> df2 = pd.DataFrame([[3, 4]])
5083
+ >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})
5084
+ >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})
5085
+ >>> df1.combine_first(df2)
5086
+ A B
5087
+ 0 1.0 3.0
5088
+ 1 0.0 4.0
5089
+
5090
+ Null values still persist if the location of that null value
5091
+ does not exist in `other`
5092
+
5093
+ >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]})
5094
+ >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])
5013
5095
>>> df1.combine_first(df2)
5014
- 0 1
5015
- 0 1 4.0
5096
+ A B C
5097
+ 0 NaN 4.0 NaN
5098
+ 1 0.0 3.0 1.0
5099
+ 2 NaN 3.0 1.0
5016
5100
5017
5101
See Also
5018
5102
--------
0 commit comments