@@ -896,6 +896,72 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True,
896
896
return DataFrame (data , index , columns , dtype = dtype )
897
897
898
898
899
+ def _create_missing_idx (nrows , ncols , density , random_state = None ):
900
+ if random_state is None :
901
+ random_state = np .random
902
+ else :
903
+ random_state = np .random .RandomState (random_state )
904
+
905
+ # below is cribbed from scipy.sparse
906
+ size = int (np .round ((1 - density ) * nrows * ncols ))
907
+ # generate a few more to ensure unique values
908
+ min_rows = 5
909
+ fac = 1.02
910
+ extra_size = min (size + min_rows , fac * size )
911
+
912
+ def _gen_unique_rand (rng , _extra_size ):
913
+ ind = rng .rand (int (_extra_size ))
914
+ return np .unique (np .floor (ind * nrows * ncols ))[:size ]
915
+
916
+ ind = _gen_unique_rand (random_state , extra_size )
917
+ while ind .size < size :
918
+ extra_size *= 1.05
919
+ ind = _gen_unique_rand (random_state , extra_size )
920
+
921
+ j = np .floor (ind * 1. / nrows )
922
+ i = (ind - j * nrows )
923
+ return i .tolist (), j .tolist ()
924
+
925
+
926
+ def makeMissingCustomDataframe (nrows , ncols , density = .9 , random_state = None ,
927
+ c_idx_names = True , r_idx_names = True ,
928
+ c_idx_nlevels = 1 , r_idx_nlevels = 1 ,
929
+ data_gen_f = None ,
930
+ c_ndupe_l = None , r_ndupe_l = None , dtype = None ,
931
+ c_idx_type = None , r_idx_type = None ):
932
+ """
933
+ Parameters
934
+ ----------
935
+ Density : float, optional
936
+ Float in (0, 1) that gives the percentage of non-missing numbers in
937
+ the DataFrame.
938
+ random_state : {np.random.RandomState, int}, optional
939
+ Random number generator or random seed.
940
+
941
+ See makeCustomDataframe for descriptions of the rest of the parameters.
942
+ """
943
+ df = makeCustomDataframe (nrows , ncols , c_idx_names = c_idx_names ,
944
+ r_idx_names = r_idx_names ,
945
+ c_idx_nlevels = c_idx_nlevels ,
946
+ r_idx_nlevels = r_idx_nlevels ,
947
+ data_gen_f = data_gen_f ,
948
+ c_ndupe_l = c_ndupe_l , r_ndupe_l = r_ndupe_l ,
949
+ dtype = dtype , c_idx_type = c_idx_type ,
950
+ r_idx_type = r_idx_type )
951
+
952
+ i , j = _create_missing_idx (nrows , ncols , density , random_state )
953
+ df .iloc [i , j ] = np .nan
954
+ return df
955
+
956
+
957
+ def makeMissingDataframe (density = .9 , random_state = None ):
958
+ df = makeDataFrame ()
959
+ i , j = _create_missing_idx (* df .shape , density = density ,
960
+ random_state = random_state )
961
+ df .iloc [i , j ] = np .nan
962
+ return df
963
+
964
+
899
965
def add_nans (panel ):
900
966
I , J , N = panel .shape
901
967
for i , item in enumerate (panel .items ):
0 commit comments